raannakasturi commited on
Commit
dc5b905
·
verified ·
1 Parent(s): db84871

Upload 124 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +12 -0
  2. .gitignore +92 -0
  3. Dockerfile +7 -0
  4. LICENSE +22 -0
  5. Procfile +1 -0
  6. app.py +42 -0
  7. bibtex.py +420 -0
  8. citation.py +232 -0
  9. data/doaj_issns.json +0 -0
  10. data/doaj_titles.json +0 -0
  11. data/vogt.txt +0 -0
  12. enhanced_citation_style.py +24 -0
  13. pytest.ini +5 -0
  14. requirements.txt +16 -0
  15. runtime.txt +1 -0
  16. software.py +84 -0
  17. static/img/badges/babel.png +0 -0
  18. static/img/badges/bff.png +0 -0
  19. static/img/badges/big_hit.png +0 -0
  20. static/img/badges/big_in_japan.png +0 -0
  21. static/img/badges/buzz.psd +3 -0
  22. static/img/badges/clean_sweep.png +0 -0
  23. static/img/badges/controversial.png +0 -0
  24. static/img/badges/deep_interest.png +0 -0
  25. static/img/badges/depsy.png +0 -0
  26. static/img/badges/famous_follower.png +0 -0
  27. static/img/badges/first_steps.png +0 -0
  28. static/img/badges/fun.psd +3 -0
  29. static/img/badges/gender_balance.png +0 -0
  30. static/img/badges/geo.psd +3 -0
  31. static/img/badges/global_reach.png +0 -0
  32. static/img/badges/global_reach_wrong_color.png +0 -0
  33. static/img/badges/global_south.png +0 -0
  34. static/img/badges/gold_star.png +0 -0
  35. static/img/badges/hot_streak.png +0 -0
  36. static/img/badges/hot_streak_wrong_color.png +0 -0
  37. static/img/badges/impressions.png +0 -0
  38. static/img/badges/influence.psd +3 -0
  39. static/img/badges/ivory_tower.png +0 -0
  40. static/img/badges/megafan.png +0 -0
  41. static/img/badges/noun_16718.png +0 -0
  42. static/img/badges/oa_advocate.png +0 -0
  43. static/img/badges/oa_early_adopter.png +0 -0
  44. static/img/badges/open_science_triathlete.png +0 -0
  45. static/img/badges/open_sesame.png +0 -0
  46. static/img/badges/openness.png +0 -0
  47. static/img/badges/openness.psd +3 -0
  48. static/img/badges/reading_level.png +0 -0
  49. static/img/badges/rick_roll.png +0 -0
  50. static/img/badges/rickroll.png +0 -0
.gitattributes CHANGED
@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ static/img/badges/buzz.psd filter=lfs diff=lfs merge=lfs -text
37
+ static/img/badges/fun.psd filter=lfs diff=lfs merge=lfs -text
38
+ static/img/badges/geo.psd filter=lfs diff=lfs merge=lfs -text
39
+ static/img/badges/influence.psd filter=lfs diff=lfs merge=lfs -text
40
+ static/img/badges/openness.psd filter=lfs diff=lfs merge=lfs -text
41
+ static/img/badges/timeline.psd filter=lfs diff=lfs merge=lfs -text
42
+ static/img/gif/orcid-import-scopus-from-nothing.gif filter=lfs diff=lfs merge=lfs -text
43
+ static/img/gif/orcid-import-scopus.gif filter=lfs diff=lfs merge=lfs -text
44
+ static/img/gif/orcid-set-public.gif filter=lfs diff=lfs merge=lfs -text
45
+ static/img/heather.jpg filter=lfs diff=lfs merge=lfs -text
46
+ static/img/layout[[:space:]]ideas.psd filter=lfs diff=lfs merge=lfs -text
47
+ static/img/science.psd filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+
5
+ # C extensions
6
+ *.so
7
+
8
+ # Distribution / packaging
9
+ .Python
10
+ env/
11
+ build/
12
+ develop-eggs/
13
+
14
+ # jason commented this one out because breaks static/dist
15
+ # dist/
16
+
17
+ local_data/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ *.temp*
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *,cover
50
+
51
+ # Translations
52
+ *.mo
53
+ *.pot
54
+
55
+ # Django stuff:
56
+ *.log
57
+
58
+ # Sphinx documentation
59
+ docs/_build/
60
+
61
+ # PyBuilder
62
+ target/
63
+
64
+ # venv
65
+ venv/
66
+
67
+ # IDE (added by jason)
68
+ .idea/
69
+
70
+ # sqlite (added by heather)
71
+ *.sqlite
72
+
73
+ # env setup (added by heather)
74
+ tng-env.sh
75
+
76
+ # node modules used in Grunt
77
+ static/node_modules
78
+
79
+ # CodeKit. Cannot seem to make this work :(
80
+ *codekit*
81
+ codekit*
82
+ codekit-config.json
83
+ static/codekit-config.json
84
+
85
+ # for markdown previews
86
+ README_cache
87
+
88
+ # local environment files (added by heather)
89
+ *.env
90
+
91
+ # raw doaj files (added by heather)
92
+ data/doaj_20*.csv
Dockerfile ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ FROM python:3.8-slim-buster
2
+ WORKDIR /app
3
+ COPY requirements.txt requirements.txt
4
+ RUN pip install -r requirements.txt
5
+ COPY . .
6
+ EXPOSE 8000
7
+ CMD ["gunicorn" , "--bind", "0.0.0.0:8000", "views:app", "-w", "3"]
LICENSE ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 OurResearch
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: gunicorn views:app -w 3 --timeout 60 --reload
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import sys
4
+
5
+ from flask import Flask
6
+ import requests
7
+ import requests_cache
8
+ import sentry_sdk
9
+ from sentry_sdk.integrations.flask import FlaskIntegration
10
+
11
+ requests_cache.install_cache(
12
+ "my_requests_cache", expire_after=60 * 60 * 24 * 1
13
+ ) # expire_after is in seconds
14
+ requests_cache.clear()
15
+
16
+ # set up logging
17
+ # see http://wiki.pylonshq.com/display/pylonscookbook/Alternative+logging+configuration
18
+ logging.basicConfig(
19
+ stream=sys.stdout, level=logging.DEBUG, format="%(name)s - %(message)s"
20
+ )
21
+ logger = logging.getLogger("citeas")
22
+
23
+ libraries_to_mum = [
24
+ "requests.packages.urllib3",
25
+ "requests.packages.urllib3.connectionpool",
26
+ "requests_oauthlib",
27
+ "urllib3.connectionpool",
28
+ "oauthlib",
29
+ "citeproc",
30
+ ]
31
+
32
+ for a_library in libraries_to_mum:
33
+ the_logger = logging.getLogger(a_library)
34
+ the_logger.setLevel(logging.WARNING)
35
+ the_logger.propagate = True
36
+
37
+ requests.packages.urllib3.disable_warnings()
38
+
39
+ # error reporting with sentry
40
+ sentry_sdk.init(dsn=os.environ.get("SENTRY_DSN"), integrations=[FlaskIntegration()])
41
+
42
+ app = Flask(__name__)
bibtex.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ from citeproc.py2compat import *
5
+
6
+ # copied from https://github.com/brechtm/citeproc-py/blob/master/citeproc/source/bibtex/bibtex.py
7
+ # then modified to fix bugs. search for "hap" to see mods
8
+
9
+ import re
10
+ import unicodedata
11
+
12
+ from warnings import warn
13
+
14
+ from citeproc.types import (
15
+ ARTICLE,
16
+ ARTICLE_JOURNAL,
17
+ BOOK,
18
+ CHAPTER,
19
+ MANUSCRIPT,
20
+ PAMPHLET,
21
+ PAPER_CONFERENCE,
22
+ REPORT,
23
+ THESIS,
24
+ )
25
+ from citeproc.string import String, MixedString, NoCase
26
+ from citeproc.source import BibliographySource, Reference, Name, Date, DateRange
27
+ from citeproc.source.bibtex.bibparse import BibTeXParser
28
+ from citeproc.source.bibtex.latex import parse_latex
29
+ from citeproc.source.bibtex.latex.macro import NewCommand, Macro
30
+
31
+
32
+ class BibTeX(BibliographySource):
33
+ fields = {
34
+ "address": "publisher_place",
35
+ "annote": "annote",
36
+ "author": "author",
37
+ "booktitle": "container_title",
38
+ "chapter": "chapter_number",
39
+ "edition": "edition",
40
+ "editor": "editor",
41
+ # 'howpublished': None,
42
+ # 'institution': None,
43
+ "journal": "container_title",
44
+ # 'month': None,
45
+ "note": "note",
46
+ "number": "issue",
47
+ # 'organization': None,
48
+ "pages": "page",
49
+ "publisher": "publisher",
50
+ # 'school': None,
51
+ "series": "collection_title",
52
+ "title": "title",
53
+ # 'type': None,
54
+ # 'year': None,
55
+ "volume": "volume",
56
+ # hap added doi and url
57
+ "doi": "doi",
58
+ "url": "url",
59
+ # non-standard fields
60
+ "isbn": "ISBN",
61
+ "issn": "ISSN",
62
+ }
63
+
64
+ types = { # standard entry types
65
+ "article": ARTICLE_JOURNAL,
66
+ "book": BOOK,
67
+ "booklet": PAMPHLET,
68
+ "conference": PAPER_CONFERENCE,
69
+ "inbook": CHAPTER,
70
+ "incollection": ARTICLE_JOURNAL,
71
+ "inproceedings": PAPER_CONFERENCE,
72
+ "manual": BOOK,
73
+ "mastersthesis": THESIS,
74
+ "misc": ARTICLE,
75
+ "phdthesis": THESIS,
76
+ "proceedings": BOOK,
77
+ "techreport": REPORT,
78
+ "unpublished": MANUSCRIPT,
79
+ # non-standard entry types
80
+ "thesis": THESIS,
81
+ "report": REPORT,
82
+ }
83
+
84
+ def __init__(self, filename, encoding="ascii"):
85
+ bibtex_database = BibTeXParser(filename)
86
+ bibtex_database.encoding = encoding
87
+ self.preamble_macros = {}
88
+ parse_latex(
89
+ bibtex_database.preamble,
90
+ {
91
+ "newcommand": NewCommand(self.preamble_macros),
92
+ "mbox": Macro(1, "{0}"),
93
+ "cite": Macro(1, "CITE({0})"),
94
+ },
95
+ )
96
+ for key, entry in bibtex_database.items():
97
+ self.add(self.create_reference(key, entry))
98
+
99
+ def _bibtex_to_csl(self, bibtex_entry):
100
+ csl_dict = {}
101
+ for field, value in bibtex_entry.items():
102
+ try:
103
+ value = value.strip()
104
+ except AttributeError:
105
+ pass
106
+
107
+ try:
108
+ csl_field = self.fields[field]
109
+ except KeyError:
110
+ csl_field = field
111
+
112
+ if field in ("number", "volume"):
113
+ try:
114
+ value = int(value)
115
+ except ValueError:
116
+ pass
117
+ elif field == "pages":
118
+ value = self._bibtex_to_csl_pages(value)
119
+ elif field in ("author", "editor"):
120
+ try:
121
+ value = [name for name in self._parse_author(value)]
122
+ except RuntimeError:
123
+ pass
124
+ else:
125
+ try:
126
+ value = self._parse_string(value)
127
+ except TypeError:
128
+ value = str(value)
129
+
130
+ csl_dict[csl_field] = value
131
+ # print("csl_dict: {}".format(csl_dict))
132
+ return csl_dict
133
+
134
+ @staticmethod
135
+ def _bibtex_to_csl_pages(value):
136
+ value = value.replace(" ", "")
137
+ if "-" in value:
138
+ try:
139
+ first, last = value.split("--")
140
+ except ValueError:
141
+ first, last = value.split("-")
142
+ pages = "-".join((first, last))
143
+ else:
144
+ pages = value[:-1] if value.endswith("+") else value
145
+ return pages
146
+
147
+ def _bibtex_to_csl_date(self, bibtex_entry):
148
+ # hap commented out the month section on feb 18, 2017 because was causing bugs,
149
+ # if 'month' in bibtex_entry:
150
+ # begin_dict, end_dict = self._parse_month(bibtex_entry['month'])
151
+ # else:
152
+ # begin_dict, end_dict = {}, {}
153
+
154
+ # hap replaced section above with this, ignoring the month.
155
+ begin_dict, end_dict = {}, {}
156
+
157
+ if "year" in bibtex_entry:
158
+ begin_dict["year"], end_dict["year"] = self._parse_year(
159
+ bibtex_entry["year"]
160
+ )
161
+ if not begin_dict:
162
+ return None
163
+ if begin_dict == end_dict:
164
+ return Date(**begin_dict)
165
+ else:
166
+ return DateRange(begin=Date(**begin_dict), end=Date(**end_dict))
167
+
168
+ def _parse_year(self, year):
169
+ try:
170
+ year_str = parse_latex(year, self.preamble_macros)
171
+ except TypeError:
172
+ year_str = str(year)
173
+ if EN_DASH in year_str:
174
+ begin_year, end_year = year_str.split(EN_DASH)
175
+ begin_len, end_len = len(begin_year), len(end_year)
176
+ if end_len < begin_len:
177
+ end_year = begin_year[: begin_len - end_len] + end_year
178
+ else:
179
+ begin_year = end_year = int(year_str)
180
+ return begin_year, end_year
181
+
182
+ MONTHS = (
183
+ "jan",
184
+ "feb",
185
+ "mar",
186
+ "apr",
187
+ "may",
188
+ "jun",
189
+ "jul",
190
+ "aug",
191
+ "sep",
192
+ "oct",
193
+ "nov",
194
+ "dec",
195
+ )
196
+ RE_DAY = "(?P<day>\d+)"
197
+ RE_MONTH = "(?P<month>\w+)"
198
+
199
+ @staticmethod
200
+ def _parse_month(month):
201
+ def month_name_to_index(name):
202
+ try:
203
+ return BibTeX.MONTHS.index(name[:3].lower()) + 1
204
+ except ValueError:
205
+ return int(name)
206
+
207
+ begin = {}
208
+ end = {}
209
+ month = month.strip()
210
+ month = month.replace(", ", "-")
211
+ if month.isdecimal():
212
+ begin["month"] = end["month"] = month
213
+ elif month.replace("-", "").isalpha():
214
+ if "-" in month:
215
+ begin["month"], end["month"] = month.split("-")
216
+ else:
217
+ begin["month"] = end["month"] = month
218
+ else:
219
+ m = re.match(BibTeX.RE_DAY + "[ ~]*" + BibTeX.RE_MONTH, month)
220
+ if m is None:
221
+ m = re.match(BibTeX.RE_MONTH + "[ ~]*" + BibTeX.RE_DAY, month)
222
+ begin["day"] = end["day"] = int(m.group("day"))
223
+ begin["month"] = end["month"] = m.group("month")
224
+ begin["month"] = month_name_to_index(begin["month"])
225
+ end["month"] = month_name_to_index(end["month"])
226
+ return begin, end
227
+
228
+ def _parse_string(self, title):
229
+ def make_string(string, top_level_group=False):
230
+ unlatexed = parse_latex(string, self.preamble_macros)
231
+ fixed_case = top_level_group and not string.startswith("\\")
232
+ string_cls = NoCase if fixed_case else String
233
+ return string_cls(unlatexed)
234
+
235
+ title = str(title)
236
+ title = title.replace("\n", "")
237
+ title = " ".join(title.split())
238
+
239
+ output = MixedString()
240
+ level = 0
241
+ string = ""
242
+ for char in title:
243
+ if char == "{":
244
+ if level == 0:
245
+ if string:
246
+ output += make_string(string)
247
+ string = ""
248
+ level += 1
249
+ elif char == "}":
250
+ level -= 1
251
+ if level == 0:
252
+ output += make_string(string, True)
253
+ string = ""
254
+ else:
255
+ string += char
256
+ if level != 0:
257
+ raise SyntaxError('Non-matching braces in "{}"'.format(title))
258
+ if string:
259
+ output += make_string(string)
260
+ return output
261
+
262
+ def _parse_author(self, authors):
263
+ csl_authors = []
264
+ for author in split_names(authors):
265
+ first, von, last, jr = parse_name(author)
266
+ csl_parts = {}
267
+ for part, csl_label in [
268
+ (first, "given"),
269
+ (von, "non-dropping-particle"),
270
+ (last, "family"),
271
+ (jr, "suffix"),
272
+ ]:
273
+ if part is not None:
274
+ csl_parts[csl_label] = parse_latex(part, self.preamble_macros)
275
+ name = Name(**csl_parts)
276
+ csl_authors.append(name)
277
+ return csl_authors
278
+
279
+ def create_reference(self, key, bibtex_entry):
280
+ csl_type = self.types[bibtex_entry.document_type]
281
+ csl_fields = self._bibtex_to_csl(bibtex_entry)
282
+ csl_date = self._bibtex_to_csl_date(bibtex_entry)
283
+ if csl_date:
284
+ csl_fields["issued"] = csl_date
285
+ ref = Reference(key, csl_type, **csl_fields)
286
+ return ref
287
+
288
+
289
+ # BibTeX name handling
290
+ #
291
+ # references
292
+ # - BibTeXing by Oren Patashnik (Feb 8, 1988), 4. Helpful Hints, item 18
293
+ # (BibTeX 0.99d - http://www.ctan.org/tex-archive/biblio/bibtex/base/btxdoc.pdf)
294
+ # - A summary of BibTex by Xavier Décoret
295
+ # (http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html)
296
+ # - Tame the BeaST by Nicolas Markey
297
+ # (http://tug.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf)
298
+
299
+ AND = " and "
300
+
301
+
302
+ def split_names(string):
303
+ """Split a string of names separated by 'and' into a list of names."""
304
+ brace_level = 0
305
+ names = []
306
+ last_index = 0
307
+ for i in range(len(string)):
308
+ char = string[i]
309
+ if brace_level == 0 and string[i:].startswith(AND):
310
+ names.append(string[last_index:i])
311
+ last_index = i + len(AND)
312
+ elif char == "{":
313
+ brace_level += 1
314
+ elif char == "}":
315
+ brace_level -= 1
316
+ last_name = string[last_index:]
317
+ if last_name:
318
+ names.append(last_name)
319
+ return names
320
+
321
+
322
+ def parse_name(name):
323
+ """Parse a BibTeX name string and split it into First, von, Last and Jr
324
+ parts.
325
+ """
326
+ parts = split_name(name)
327
+ if len(parts) == 1: # First von Last
328
+ (first_von_last,) = parts
329
+ index = 0
330
+ first, jr = [], []
331
+ for word in first_von_last[:-1]:
332
+ if is_capitalized(word) not in (True, None):
333
+ break
334
+ first.append(word)
335
+ index += 1
336
+ von_last = first_von_last[index:]
337
+ elif len(parts) == 2: # von Last, First
338
+ jr = []
339
+ von_last, first = parts
340
+ elif len(parts) == 3: # von Last, Jr, First
341
+ von_last, jr, first = parts
342
+ von, last = split_von_last(von_last)
343
+ join = " ".join
344
+ return join(first) or None, join(von) or None, join(last), join(jr) or None
345
+
346
+
347
+ def split_name(name):
348
+ """Split a name in into parts delimited by commas (at brace-level 0), and
349
+ each part into words.
350
+
351
+ Returns a list of of lists of words.
352
+ """
353
+ brace_level = 0
354
+ parts = []
355
+ current_part = []
356
+ word = ""
357
+ for char in name:
358
+ if char in " \t,":
359
+ if brace_level == 0:
360
+ if word:
361
+ current_part.append(word)
362
+ word = ""
363
+ if char == ",":
364
+ parts.append(current_part)
365
+ current_part = []
366
+ continue
367
+ elif char == "{":
368
+ brace_level += 1
369
+ elif char == "}":
370
+ brace_level -= 1
371
+ word += char
372
+ if word:
373
+ current_part.append(word)
374
+ parts.append(current_part)
375
+ return parts
376
+
377
+
378
+ def is_capitalized(string):
379
+ """Check if a BibTeX substring is capitalized.
380
+
381
+ A string can be "case-less", in which case `None` is returned.
382
+ """
383
+ brace_level = 0
384
+ special_char = False
385
+ for char, next_char in lookahead_iter(string):
386
+ if (brace_level == 0 or special_char) and char.isalpha():
387
+ return char.isupper()
388
+ elif char == "{":
389
+ brace_level += 1
390
+ if brace_level == 1 and next_char == "\\":
391
+ special_char = True
392
+ elif char == "}":
393
+ brace_level -= 1
394
+ if brace_level == 0:
395
+ special_char = False
396
+ return None # case-less
397
+
398
+
399
+ def split_von_last(words):
400
+ """Split "von Last" name into von and Last parts."""
401
+ if len(words) > 1 and is_capitalized(words[0]) is False:
402
+ for j, word in enumerate(reversed(words[:-1])):
403
+ if is_capitalized(word) not in (True, None):
404
+ return words[: -j - 1], words[-j - 1 :]
405
+ return [], words
406
+
407
+
408
+ def lookahead_iter(iterable):
409
+ """Iterator that also yields the next item along with each item. The next
410
+ item is `None` when yielding the last item.
411
+ """
412
+ items = iter(iterable)
413
+ item = next(items)
414
+ for next_item in items:
415
+ yield item, next_item
416
+ item = next_item
417
+ yield item, None
418
+
419
+
420
+ EN_DASH = unicodedata.lookup("EN DASH")
citation.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import html
2
+
3
+ from citeproc import formatter, CitationStylesBibliography, Citation, CitationItem
4
+ from citeproc.source.json import CiteProcJSON
5
+
6
+ from enhanced_citation_style import EnhancedCitationStyle, get_style_name
7
+ from steps.utils import author_name_as_dict
8
+
9
+
10
+ def display_citation(bibtex_metadata, bib_stylename, formatter=formatter.html):
11
+ # valid style names: plos, apa, pnas, nature, bmj, harvard1
12
+ # full list is here: https://github.com/citation-style-language/styles
13
+
14
+ bib_style = EnhancedCitationStyle(bib_stylename)
15
+ bibliography = CitationStylesBibliography(
16
+ bib_style, bibtex_metadata, formatter
17
+ ) # could be formatter.html
18
+ citation = Citation([CitationItem("ITEM-1")])
19
+ bibliography.register(citation)
20
+
21
+ citation_parts = "".join(bibliography.bibliography()[0])
22
+ citation_text = "".join(citation_parts)
23
+
24
+ if bib_stylename == "apa":
25
+ # strip extra periods and spaces that can occur in APA format
26
+ citation_text = citation_text.replace("..", ".")
27
+ citation_text = citation_text.replace(" ", " ")
28
+
29
+ citation_text = citation_text.strip()
30
+
31
+ # strip leading comma
32
+ if citation_text.startswith(","):
33
+ citation_text = citation_text.lstrip(",").strip()
34
+
35
+ citation_text = strip_duplicate_apa_title(bibtex_metadata, citation_text)
36
+
37
+ citation_text = html.unescape(citation_text)
38
+ return citation_text
39
+
40
+
41
+ def get_author_list(data_author):
42
+ author_list = []
43
+ for name_dict in data_author:
44
+ new_name_dict = {}
45
+ if "family" not in name_dict:
46
+ if "name" in name_dict:
47
+ new_name_dict["family"] = name_dict["name"]
48
+ else:
49
+ new_name_dict["family"] = ""
50
+ for name_k, name_v in name_dict.items():
51
+ if name_k == "literal":
52
+ new_name_dict = author_name_as_dict(name_v)
53
+ else:
54
+ new_name_dict[name_k] = name_v
55
+ author_list.append(new_name_dict)
56
+ return author_list
57
+
58
+
59
+ def build_bibtex_author_list(authors):
60
+ author_list = ""
61
+ for i, author in enumerate(authors):
62
+ if i > 0:
63
+ author_list += " and "
64
+
65
+ if author.get("family"):
66
+ author_list += author.get("family")
67
+
68
+ if author.get("given"):
69
+ author_list += ", " + author.get("given")
70
+
71
+ return author_list
72
+
73
+
74
+ def bibtex_pages_format(pages):
75
+ return pages.replace("-", "--")
76
+
77
+
78
+ def get_bib_source_from_dict(data):
79
+ data["id"] = "ITEM-1"
80
+
81
+ if "author" in data:
82
+ data["author"] = get_author_list(data["author"])
83
+
84
+ if "type" not in data:
85
+ data["type"] = "misc"
86
+
87
+ if data["type"] != "software":
88
+ for k, val in data.items():
89
+ if val and (k in ["title", "container-title"]):
90
+ num_upper = sum([1 for c in val if c.isupper()])
91
+ if num_upper > 0.75 * len(val):
92
+ data[k] = val.title()
93
+
94
+ if "page" in data and data["page"] == "-":
95
+ del data["page"]
96
+
97
+ if "bibtex" in data:
98
+ del data["bibtex"]
99
+
100
+ if "issued" in data:
101
+ if data["issued"]["date-parts"][0][0] is None:
102
+ del data["issued"]
103
+
104
+ bib_source = CiteProcJSON([data])
105
+
106
+ return bib_source
107
+
108
+
109
+ def strip_duplicate_apa_title(bibtex_metadata, citation_text):
110
+ item = bibtex_metadata.get("item-1")
111
+ title = item.get("title")
112
+ if title and "Retrieved from https://github.com" not in citation_text:
113
+ title = "".join(title).replace(" ", " ")
114
+ if citation_text.count(title) == 2:
115
+ citation_text = citation_text.replace(title, "", 1)
116
+ if citation_text[0] == ".":
117
+ citation_text = citation_text.replace(".", "", 1)
118
+ citation_text = citation_text.lstrip()
119
+ return citation_text
120
+
121
+
122
+ def citations(bibtex_metadata):
123
+ response = []
124
+ # full list of possible citation formats is here: https://github.com/citation-style-language/styles
125
+ for bib_stylename in [
126
+ "apa",
127
+ "harvard1",
128
+ "nature",
129
+ "modern-language-association-with-url",
130
+ "chicago-author-date",
131
+ "vancouver",
132
+ ]:
133
+ citation_style_object = {
134
+ "style_shortname": bib_stylename,
135
+ "citation": display_citation(bibtex_metadata, bib_stylename),
136
+ "style_fullname": get_style_name(bib_stylename),
137
+ }
138
+ response.append(citation_style_object)
139
+ return response
140
+
141
+
142
+ def export_contents(export_type, metadata_dict):
143
+ if export_type == "csv":
144
+ items = list(metadata_dict.items())
145
+ header_row = ",".join([name for (name, value) in items])
146
+ try:
147
+ value_row = ",".join([str(value) for (name, value) in items])
148
+ except UnicodeEncodeError:
149
+ value_row = ""
150
+ response = "{}\n{}".format(header_row, value_row)
151
+ return response
152
+ elif export_type == "ris":
153
+ response_list = []
154
+ response_list.append(("TY", "JOUR"))
155
+ response_list.append(("T1", metadata_dict.get("title", "")))
156
+ response_list.append(("JO", metadata_dict.get("container-title", "")))
157
+ response_list.append(("VL", metadata_dict.get("volume", "")))
158
+ response_list.append(("IS", metadata_dict.get("issue", "")))
159
+ response_list.append(("SP", metadata_dict.get("page", "")))
160
+ response_list.append(("V1", metadata_dict.get("year", "")))
161
+ response_list.append(("PB", metadata_dict.get("publisher", "")))
162
+ for author in metadata_dict.get("author", []):
163
+ response_list.append(
164
+ ("A1", ", ".join([author.get("family", ""), author.get("given", "")]))
165
+ )
166
+ response = "\n".join("{} - {}".format(k, v) for (k, v) in response_list)
167
+ response += "\nER - "
168
+ return response
169
+ elif export_type == "enw":
170
+ response_list = []
171
+ response_list.append(("%T", metadata_dict.get("title", "")))
172
+ response_list.append(("%J", metadata_dict.get("container-title", "")))
173
+ response_list.append(("%V", metadata_dict.get("volume", "")))
174
+ response_list.append(("%N", metadata_dict.get("issue", "")))
175
+ response_list.append(("%P", metadata_dict.get("page", "")))
176
+ response_list.append(("%D", metadata_dict.get("year", "")))
177
+ response_list.append(("%I", metadata_dict.get("publisher", "")))
178
+ response_list.append(("0%", "Journal Article"))
179
+ for author in metadata_dict.get("author", []):
180
+ response_list.append(
181
+ ("%A", ", ".join([author.get("family", ""), author.get("given", "")]))
182
+ )
183
+ response = "\n".join("{} {}".format(k, v) for (k, v) in response_list)
184
+ return response
185
+ elif export_type == "bibtex":
186
+ if metadata_dict.get("type"):
187
+ response = "@" + metadata_dict.get("type") + "{ITEM1, "
188
+ else:
189
+ response = "@article{ITEM1, "
190
+
191
+ response_list = []
192
+
193
+ response_list.append(("title", metadata_dict.get("title", "")))
194
+
195
+ # handle book type differently
196
+ if metadata_dict.get("type") == "book":
197
+ response_list.append(("isbn", metadata_dict.get("isbn", "")))
198
+ elif metadata_dict.get("type") == "software":
199
+ response_list.append(("url", metadata_dict.get("URL", "")))
200
+ response_list.append(("journal", metadata_dict.get("container-title", "")))
201
+ response_list.append(("volume", metadata_dict.get("volume", "")))
202
+ response_list.append(("number", metadata_dict.get("number", "")))
203
+ else:
204
+ response_list.append(("journal", metadata_dict.get("container-title", "")))
205
+ response_list.append(("volume", metadata_dict.get("volume", "")))
206
+ response_list.append(("number", metadata_dict.get("number", "")))
207
+
208
+ response_list.append(
209
+ ("pages", bibtex_pages_format(metadata_dict.get("page", "")))
210
+ )
211
+ response_list.append(("year", metadata_dict.get("year", "")))
212
+ response_list.append(("publisher", metadata_dict.get("publisher", "")))
213
+ author_list = build_bibtex_author_list(metadata_dict.get("author", []))
214
+ response_list.append(("author", author_list))
215
+
216
+ response += ",\n".join("{}={{{}}}".format(k, v) for (k, v) in response_list)
217
+ response += "}"
218
+
219
+ return response
220
+
221
+ return None
222
+
223
+
224
+ def reference_manager_exports(metadata_dict):
225
+ response = []
226
+ for export_name in ["csv", "enw", "ris", "bibtex"]:
227
+ export_object = {
228
+ "export_name": export_name,
229
+ "export": export_contents(export_name, metadata_dict),
230
+ }
231
+ response.append(export_object)
232
+ return response
data/doaj_issns.json ADDED
The diff for this file is too large to render. See raw diff
 
data/doaj_titles.json ADDED
The diff for this file is too large to render. See raw diff
 
data/vogt.txt ADDED
The diff for this file is too large to render. See raw diff
 
enhanced_citation_style.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from citeproc import CitationStylesStyle
2
+ from citeproc_styles import get_style_filepath
3
+
4
+
5
+ class EnhancedCitationStyle(CitationStylesStyle):
6
+ def __init__(self, bib_stylename):
7
+ # valid style names: plos, apa, pnas, nature, bmj, harvard1
8
+ # full list is here: https://github.com/citation-style-language/styles
9
+
10
+ self.style_path = get_style_filepath(bib_stylename)
11
+ super(EnhancedCitationStyle, self).__init__(self.style_path, validate=False)
12
+
13
+ @property
14
+ def name(self):
15
+ info = self.root.find("{http://purl.org/net/xbiblio/csl}info")
16
+ if info is not None:
17
+ title = info.find("{http://purl.org/net/xbiblio/csl}title")
18
+ return title.text
19
+ return self.style_path
20
+
21
+
22
+ def get_style_name(bib_stylename):
23
+ style_obj = EnhancedCitationStyle(bib_stylename)
24
+ return style_obj.name
pytest.ini ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [pytest]
2
+ addopts = -v
3
+ filterwarnings =
4
+ ignore::UserWarning
5
+ ignore::DeprecationWarning
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ arxiv2bib==1.0.8
2
+ black==21.7b0
3
+ citeproc-py==0.5.1
4
+ citeproc-py-styles==0.1.2
5
+ Flask==2.0.1
6
+ Flask-DebugToolbar==0.11.0
7
+ google==3.0.0
8
+ gunicorn==20.1.0
9
+ json5==0.9.6
10
+ nameparser==1.0.6
11
+ pytest==6.2.5
12
+ requests==2.26.0
13
+ requests-cache==0.8.1
14
+ sentry-sdk==1.3.1
15
+ unidecode==1.3.2
16
+ validators==0.18.2
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.8.12
software.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from citation import get_bib_source_from_dict, citations, reference_manager_exports
2
+ from steps.user_input import UserInputStep
3
+ from steps.exceptions import NoChildrenException
4
+
5
+
6
+ class Software(object):
7
+ def __init__(self, user_supplied_id):
8
+ self.user_supplied_id = user_supplied_id
9
+ self.completed_steps = []
10
+
11
+ def find_metadata(self):
12
+ my_step = UserInputStep()
13
+ my_step.set_content_url(self.user_supplied_id)
14
+ my_step.set_content(self.user_supplied_id)
15
+ self.completed_steps.append(my_step)
16
+
17
+ cursor = 0
18
+ while not self.completed_steps[-1].is_metadata:
19
+ current_step = self.completed_steps[cursor]
20
+
21
+ try:
22
+ next_step = current_step.get_child()
23
+ self.completed_steps.append(next_step)
24
+ cursor = len(self.completed_steps) - 1
25
+ except NoChildrenException:
26
+ cursor -= 1
27
+
28
+ @property
29
+ def name(self):
30
+ if self.metadata and self.metadata.get("title", ""):
31
+ response = self.metadata.get("title", "")
32
+ if response.__class__.__name__ == "MixedString":
33
+ return response.pop()
34
+ else:
35
+ return response
36
+ return self.display_url
37
+
38
+ @property
39
+ def display_url(self):
40
+ return self.completed_steps[0].content_url
41
+
42
+ @property
43
+ def metadata(self):
44
+ metadata_step = self.completed_steps[-1]
45
+ if metadata_step.content.get("issued"):
46
+ try:
47
+ year = metadata_step.content["issued"]["date-parts"][0][0]
48
+ except IndexError:
49
+ year = ""
50
+ metadata_step.content["year"] = year
51
+
52
+ metadata_dict = metadata_step.content
53
+
54
+ for step in reversed(self.completed_steps):
55
+ if step.url and step.content:
56
+ metadata_dict["URL"] = step.url
57
+ break
58
+
59
+ return metadata_dict
60
+
61
+ def get_provenance(self):
62
+ ret = [s.to_dict() for s in self.completed_steps]
63
+ return ret
64
+
65
+ @property
66
+ def citation_plain(self):
67
+ citations = self.to_dict()["citations"]
68
+ return next(
69
+ (i["citation"] for i in citations if i["style_shortname"] == "harvard1"),
70
+ None,
71
+ )
72
+
73
+ def to_dict(self):
74
+ bibtex_metadata = get_bib_source_from_dict(self.metadata)
75
+
76
+ ret = {
77
+ "url": self.display_url,
78
+ "name": self.name,
79
+ "citations": citations(bibtex_metadata),
80
+ "exports": reference_manager_exports(self.metadata),
81
+ "metadata": self.metadata,
82
+ "provenance": self.get_provenance(),
83
+ }
84
+ return ret
static/img/badges/babel.png ADDED
static/img/badges/bff.png ADDED
static/img/badges/big_hit.png ADDED
static/img/badges/big_in_japan.png ADDED
static/img/badges/buzz.psd ADDED

Git LFS Details

  • SHA256: fd89cb507fc779f1a553dd743475dfbdc9b017c8d06248d06cf4143bebeec577
  • Pointer size: 132 Bytes
  • Size of remote file: 1.57 MB
static/img/badges/clean_sweep.png ADDED
static/img/badges/controversial.png ADDED
static/img/badges/deep_interest.png ADDED
static/img/badges/depsy.png ADDED
static/img/badges/famous_follower.png ADDED
static/img/badges/first_steps.png ADDED
static/img/badges/fun.psd ADDED

Git LFS Details

  • SHA256: 4e56f037b27a7c98a483d6463284f5d6cb4662be669cc289d6c1e9e6795aed53
  • Pointer size: 132 Bytes
  • Size of remote file: 2.08 MB
static/img/badges/gender_balance.png ADDED
static/img/badges/geo.psd ADDED

Git LFS Details

  • SHA256: 5bf957ee54ca618e8d59b060526212f976064eb59f2d1b1a106d90af69e39eb1
  • Pointer size: 132 Bytes
  • Size of remote file: 1.7 MB
static/img/badges/global_reach.png ADDED
static/img/badges/global_reach_wrong_color.png ADDED
static/img/badges/global_south.png ADDED
static/img/badges/gold_star.png ADDED
static/img/badges/hot_streak.png ADDED
static/img/badges/hot_streak_wrong_color.png ADDED
static/img/badges/impressions.png ADDED
static/img/badges/influence.psd ADDED

Git LFS Details

  • SHA256: e96a7f152eed701ecf66ae3c187425cbbd60dd5e0422591b02c4129f4a53c8c5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.98 MB
static/img/badges/ivory_tower.png ADDED
static/img/badges/megafan.png ADDED
static/img/badges/noun_16718.png ADDED
static/img/badges/oa_advocate.png ADDED
static/img/badges/oa_early_adopter.png ADDED
static/img/badges/open_science_triathlete.png ADDED
static/img/badges/open_sesame.png ADDED
static/img/badges/openness.png ADDED
static/img/badges/openness.psd ADDED

Git LFS Details

  • SHA256: 2db7704a49162290fece2470a7471bd47d9b4b491b0145ee5e1ac70420857952
  • Pointer size: 132 Bytes
  • Size of remote file: 1.87 MB
static/img/badges/reading_level.png ADDED
static/img/badges/rick_roll.png ADDED
static/img/badges/rickroll.png ADDED