Spaces:
Runtime error
Runtime error
Upload 124 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +12 -0
- .gitignore +92 -0
- Dockerfile +7 -0
- LICENSE +22 -0
- Procfile +1 -0
- app.py +42 -0
- bibtex.py +420 -0
- citation.py +232 -0
- data/doaj_issns.json +0 -0
- data/doaj_titles.json +0 -0
- data/vogt.txt +0 -0
- enhanced_citation_style.py +24 -0
- pytest.ini +5 -0
- requirements.txt +16 -0
- runtime.txt +1 -0
- software.py +84 -0
- static/img/badges/babel.png +0 -0
- static/img/badges/bff.png +0 -0
- static/img/badges/big_hit.png +0 -0
- static/img/badges/big_in_japan.png +0 -0
- static/img/badges/buzz.psd +3 -0
- static/img/badges/clean_sweep.png +0 -0
- static/img/badges/controversial.png +0 -0
- static/img/badges/deep_interest.png +0 -0
- static/img/badges/depsy.png +0 -0
- static/img/badges/famous_follower.png +0 -0
- static/img/badges/first_steps.png +0 -0
- static/img/badges/fun.psd +3 -0
- static/img/badges/gender_balance.png +0 -0
- static/img/badges/geo.psd +3 -0
- static/img/badges/global_reach.png +0 -0
- static/img/badges/global_reach_wrong_color.png +0 -0
- static/img/badges/global_south.png +0 -0
- static/img/badges/gold_star.png +0 -0
- static/img/badges/hot_streak.png +0 -0
- static/img/badges/hot_streak_wrong_color.png +0 -0
- static/img/badges/impressions.png +0 -0
- static/img/badges/influence.psd +3 -0
- static/img/badges/ivory_tower.png +0 -0
- static/img/badges/megafan.png +0 -0
- static/img/badges/noun_16718.png +0 -0
- static/img/badges/oa_advocate.png +0 -0
- static/img/badges/oa_early_adopter.png +0 -0
- static/img/badges/open_science_triathlete.png +0 -0
- static/img/badges/open_sesame.png +0 -0
- static/img/badges/openness.png +0 -0
- static/img/badges/openness.psd +3 -0
- static/img/badges/reading_level.png +0 -0
- static/img/badges/rick_roll.png +0 -0
- static/img/badges/rickroll.png +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
static/img/badges/buzz.psd filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
static/img/badges/fun.psd filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
static/img/badges/geo.psd filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
static/img/badges/influence.psd filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
static/img/badges/openness.psd filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
static/img/badges/timeline.psd filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
static/img/gif/orcid-import-scopus-from-nothing.gif filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
static/img/gif/orcid-import-scopus.gif filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
static/img/gif/orcid-set-public.gif filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
static/img/heather.jpg filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
static/img/layout[[:space:]]ideas.psd filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
static/img/science.psd filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
|
| 5 |
+
# C extensions
|
| 6 |
+
*.so
|
| 7 |
+
|
| 8 |
+
# Distribution / packaging
|
| 9 |
+
.Python
|
| 10 |
+
env/
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
|
| 14 |
+
# jason commented this one out because breaks static/dist
|
| 15 |
+
# dist/
|
| 16 |
+
|
| 17 |
+
local_data/
|
| 18 |
+
downloads/
|
| 19 |
+
eggs/
|
| 20 |
+
.eggs/
|
| 21 |
+
lib/
|
| 22 |
+
lib64/
|
| 23 |
+
parts/
|
| 24 |
+
sdist/
|
| 25 |
+
var/
|
| 26 |
+
*.egg-info/
|
| 27 |
+
.installed.cfg
|
| 28 |
+
*.egg
|
| 29 |
+
*.temp*
|
| 30 |
+
|
| 31 |
+
# PyInstaller
|
| 32 |
+
# Usually these files are written by a python script from a template
|
| 33 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 34 |
+
*.manifest
|
| 35 |
+
*.spec
|
| 36 |
+
|
| 37 |
+
# Installer logs
|
| 38 |
+
pip-log.txt
|
| 39 |
+
pip-delete-this-directory.txt
|
| 40 |
+
|
| 41 |
+
# Unit test / coverage reports
|
| 42 |
+
htmlcov/
|
| 43 |
+
.tox/
|
| 44 |
+
.coverage
|
| 45 |
+
.coverage.*
|
| 46 |
+
.cache
|
| 47 |
+
nosetests.xml
|
| 48 |
+
coverage.xml
|
| 49 |
+
*,cover
|
| 50 |
+
|
| 51 |
+
# Translations
|
| 52 |
+
*.mo
|
| 53 |
+
*.pot
|
| 54 |
+
|
| 55 |
+
# Django stuff:
|
| 56 |
+
*.log
|
| 57 |
+
|
| 58 |
+
# Sphinx documentation
|
| 59 |
+
docs/_build/
|
| 60 |
+
|
| 61 |
+
# PyBuilder
|
| 62 |
+
target/
|
| 63 |
+
|
| 64 |
+
# venv
|
| 65 |
+
venv/
|
| 66 |
+
|
| 67 |
+
# IDE (added by jason)
|
| 68 |
+
.idea/
|
| 69 |
+
|
| 70 |
+
# sqlite (added by heather)
|
| 71 |
+
*.sqlite
|
| 72 |
+
|
| 73 |
+
# env setup (added by heather)
|
| 74 |
+
tng-env.sh
|
| 75 |
+
|
| 76 |
+
# node modules used in Grunt
|
| 77 |
+
static/node_modules
|
| 78 |
+
|
| 79 |
+
# CodeKit. Cannot seem to make this work :(
|
| 80 |
+
*codekit*
|
| 81 |
+
codekit*
|
| 82 |
+
codekit-config.json
|
| 83 |
+
static/codekit-config.json
|
| 84 |
+
|
| 85 |
+
# for markdown previews
|
| 86 |
+
README_cache
|
| 87 |
+
|
| 88 |
+
# local environment files (added by heather)
|
| 89 |
+
*.env
|
| 90 |
+
|
| 91 |
+
# raw doaj files (added by heather)
|
| 92 |
+
data/doaj_20*.csv
|
Dockerfile
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.8-slim-buster
|
| 2 |
+
WORKDIR /app
|
| 3 |
+
COPY requirements.txt requirements.txt
|
| 4 |
+
RUN pip install -r requirements.txt
|
| 5 |
+
COPY . .
|
| 6 |
+
EXPOSE 8000
|
| 7 |
+
CMD ["gunicorn" , "--bind", "0.0.0.0:8000", "views:app", "-w", "3"]
|
LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
The MIT License (MIT)
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2021 OurResearch
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
| 22 |
+
|
Procfile
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
web: gunicorn views:app -w 3 --timeout 60 --reload
|
app.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
|
| 5 |
+
from flask import Flask
|
| 6 |
+
import requests
|
| 7 |
+
import requests_cache
|
| 8 |
+
import sentry_sdk
|
| 9 |
+
from sentry_sdk.integrations.flask import FlaskIntegration
|
| 10 |
+
|
| 11 |
+
requests_cache.install_cache(
|
| 12 |
+
"my_requests_cache", expire_after=60 * 60 * 24 * 1
|
| 13 |
+
) # expire_after is in seconds
|
| 14 |
+
requests_cache.clear()
|
| 15 |
+
|
| 16 |
+
# set up logging
|
| 17 |
+
# see http://wiki.pylonshq.com/display/pylonscookbook/Alternative+logging+configuration
|
| 18 |
+
logging.basicConfig(
|
| 19 |
+
stream=sys.stdout, level=logging.DEBUG, format="%(name)s - %(message)s"
|
| 20 |
+
)
|
| 21 |
+
logger = logging.getLogger("citeas")
|
| 22 |
+
|
| 23 |
+
libraries_to_mum = [
|
| 24 |
+
"requests.packages.urllib3",
|
| 25 |
+
"requests.packages.urllib3.connectionpool",
|
| 26 |
+
"requests_oauthlib",
|
| 27 |
+
"urllib3.connectionpool",
|
| 28 |
+
"oauthlib",
|
| 29 |
+
"citeproc",
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
for a_library in libraries_to_mum:
|
| 33 |
+
the_logger = logging.getLogger(a_library)
|
| 34 |
+
the_logger.setLevel(logging.WARNING)
|
| 35 |
+
the_logger.propagate = True
|
| 36 |
+
|
| 37 |
+
requests.packages.urllib3.disable_warnings()
|
| 38 |
+
|
| 39 |
+
# error reporting with sentry
|
| 40 |
+
sentry_sdk.init(dsn=os.environ.get("SENTRY_DSN"), integrations=[FlaskIntegration()])
|
| 41 |
+
|
| 42 |
+
app = Flask(__name__)
|
bibtex.py
ADDED
|
@@ -0,0 +1,420 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
from citeproc.py2compat import *
|
| 5 |
+
|
| 6 |
+
# copied from https://github.com/brechtm/citeproc-py/blob/master/citeproc/source/bibtex/bibtex.py
|
| 7 |
+
# then modified to fix bugs. search for "hap" to see mods
|
| 8 |
+
|
| 9 |
+
import re
|
| 10 |
+
import unicodedata
|
| 11 |
+
|
| 12 |
+
from warnings import warn
|
| 13 |
+
|
| 14 |
+
from citeproc.types import (
|
| 15 |
+
ARTICLE,
|
| 16 |
+
ARTICLE_JOURNAL,
|
| 17 |
+
BOOK,
|
| 18 |
+
CHAPTER,
|
| 19 |
+
MANUSCRIPT,
|
| 20 |
+
PAMPHLET,
|
| 21 |
+
PAPER_CONFERENCE,
|
| 22 |
+
REPORT,
|
| 23 |
+
THESIS,
|
| 24 |
+
)
|
| 25 |
+
from citeproc.string import String, MixedString, NoCase
|
| 26 |
+
from citeproc.source import BibliographySource, Reference, Name, Date, DateRange
|
| 27 |
+
from citeproc.source.bibtex.bibparse import BibTeXParser
|
| 28 |
+
from citeproc.source.bibtex.latex import parse_latex
|
| 29 |
+
from citeproc.source.bibtex.latex.macro import NewCommand, Macro
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class BibTeX(BibliographySource):
|
| 33 |
+
fields = {
|
| 34 |
+
"address": "publisher_place",
|
| 35 |
+
"annote": "annote",
|
| 36 |
+
"author": "author",
|
| 37 |
+
"booktitle": "container_title",
|
| 38 |
+
"chapter": "chapter_number",
|
| 39 |
+
"edition": "edition",
|
| 40 |
+
"editor": "editor",
|
| 41 |
+
# 'howpublished': None,
|
| 42 |
+
# 'institution': None,
|
| 43 |
+
"journal": "container_title",
|
| 44 |
+
# 'month': None,
|
| 45 |
+
"note": "note",
|
| 46 |
+
"number": "issue",
|
| 47 |
+
# 'organization': None,
|
| 48 |
+
"pages": "page",
|
| 49 |
+
"publisher": "publisher",
|
| 50 |
+
# 'school': None,
|
| 51 |
+
"series": "collection_title",
|
| 52 |
+
"title": "title",
|
| 53 |
+
# 'type': None,
|
| 54 |
+
# 'year': None,
|
| 55 |
+
"volume": "volume",
|
| 56 |
+
# hap added doi and url
|
| 57 |
+
"doi": "doi",
|
| 58 |
+
"url": "url",
|
| 59 |
+
# non-standard fields
|
| 60 |
+
"isbn": "ISBN",
|
| 61 |
+
"issn": "ISSN",
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
types = { # standard entry types
|
| 65 |
+
"article": ARTICLE_JOURNAL,
|
| 66 |
+
"book": BOOK,
|
| 67 |
+
"booklet": PAMPHLET,
|
| 68 |
+
"conference": PAPER_CONFERENCE,
|
| 69 |
+
"inbook": CHAPTER,
|
| 70 |
+
"incollection": ARTICLE_JOURNAL,
|
| 71 |
+
"inproceedings": PAPER_CONFERENCE,
|
| 72 |
+
"manual": BOOK,
|
| 73 |
+
"mastersthesis": THESIS,
|
| 74 |
+
"misc": ARTICLE,
|
| 75 |
+
"phdthesis": THESIS,
|
| 76 |
+
"proceedings": BOOK,
|
| 77 |
+
"techreport": REPORT,
|
| 78 |
+
"unpublished": MANUSCRIPT,
|
| 79 |
+
# non-standard entry types
|
| 80 |
+
"thesis": THESIS,
|
| 81 |
+
"report": REPORT,
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
def __init__(self, filename, encoding="ascii"):
|
| 85 |
+
bibtex_database = BibTeXParser(filename)
|
| 86 |
+
bibtex_database.encoding = encoding
|
| 87 |
+
self.preamble_macros = {}
|
| 88 |
+
parse_latex(
|
| 89 |
+
bibtex_database.preamble,
|
| 90 |
+
{
|
| 91 |
+
"newcommand": NewCommand(self.preamble_macros),
|
| 92 |
+
"mbox": Macro(1, "{0}"),
|
| 93 |
+
"cite": Macro(1, "CITE({0})"),
|
| 94 |
+
},
|
| 95 |
+
)
|
| 96 |
+
for key, entry in bibtex_database.items():
|
| 97 |
+
self.add(self.create_reference(key, entry))
|
| 98 |
+
|
| 99 |
+
def _bibtex_to_csl(self, bibtex_entry):
|
| 100 |
+
csl_dict = {}
|
| 101 |
+
for field, value in bibtex_entry.items():
|
| 102 |
+
try:
|
| 103 |
+
value = value.strip()
|
| 104 |
+
except AttributeError:
|
| 105 |
+
pass
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
csl_field = self.fields[field]
|
| 109 |
+
except KeyError:
|
| 110 |
+
csl_field = field
|
| 111 |
+
|
| 112 |
+
if field in ("number", "volume"):
|
| 113 |
+
try:
|
| 114 |
+
value = int(value)
|
| 115 |
+
except ValueError:
|
| 116 |
+
pass
|
| 117 |
+
elif field == "pages":
|
| 118 |
+
value = self._bibtex_to_csl_pages(value)
|
| 119 |
+
elif field in ("author", "editor"):
|
| 120 |
+
try:
|
| 121 |
+
value = [name for name in self._parse_author(value)]
|
| 122 |
+
except RuntimeError:
|
| 123 |
+
pass
|
| 124 |
+
else:
|
| 125 |
+
try:
|
| 126 |
+
value = self._parse_string(value)
|
| 127 |
+
except TypeError:
|
| 128 |
+
value = str(value)
|
| 129 |
+
|
| 130 |
+
csl_dict[csl_field] = value
|
| 131 |
+
# print("csl_dict: {}".format(csl_dict))
|
| 132 |
+
return csl_dict
|
| 133 |
+
|
| 134 |
+
@staticmethod
|
| 135 |
+
def _bibtex_to_csl_pages(value):
|
| 136 |
+
value = value.replace(" ", "")
|
| 137 |
+
if "-" in value:
|
| 138 |
+
try:
|
| 139 |
+
first, last = value.split("--")
|
| 140 |
+
except ValueError:
|
| 141 |
+
first, last = value.split("-")
|
| 142 |
+
pages = "-".join((first, last))
|
| 143 |
+
else:
|
| 144 |
+
pages = value[:-1] if value.endswith("+") else value
|
| 145 |
+
return pages
|
| 146 |
+
|
| 147 |
+
def _bibtex_to_csl_date(self, bibtex_entry):
|
| 148 |
+
# hap commented out the month section on feb 18, 2017 because was causing bugs,
|
| 149 |
+
# if 'month' in bibtex_entry:
|
| 150 |
+
# begin_dict, end_dict = self._parse_month(bibtex_entry['month'])
|
| 151 |
+
# else:
|
| 152 |
+
# begin_dict, end_dict = {}, {}
|
| 153 |
+
|
| 154 |
+
# hap replaced section above with this, ignoring the month.
|
| 155 |
+
begin_dict, end_dict = {}, {}
|
| 156 |
+
|
| 157 |
+
if "year" in bibtex_entry:
|
| 158 |
+
begin_dict["year"], end_dict["year"] = self._parse_year(
|
| 159 |
+
bibtex_entry["year"]
|
| 160 |
+
)
|
| 161 |
+
if not begin_dict:
|
| 162 |
+
return None
|
| 163 |
+
if begin_dict == end_dict:
|
| 164 |
+
return Date(**begin_dict)
|
| 165 |
+
else:
|
| 166 |
+
return DateRange(begin=Date(**begin_dict), end=Date(**end_dict))
|
| 167 |
+
|
| 168 |
+
def _parse_year(self, year):
|
| 169 |
+
try:
|
| 170 |
+
year_str = parse_latex(year, self.preamble_macros)
|
| 171 |
+
except TypeError:
|
| 172 |
+
year_str = str(year)
|
| 173 |
+
if EN_DASH in year_str:
|
| 174 |
+
begin_year, end_year = year_str.split(EN_DASH)
|
| 175 |
+
begin_len, end_len = len(begin_year), len(end_year)
|
| 176 |
+
if end_len < begin_len:
|
| 177 |
+
end_year = begin_year[: begin_len - end_len] + end_year
|
| 178 |
+
else:
|
| 179 |
+
begin_year = end_year = int(year_str)
|
| 180 |
+
return begin_year, end_year
|
| 181 |
+
|
| 182 |
+
MONTHS = (
|
| 183 |
+
"jan",
|
| 184 |
+
"feb",
|
| 185 |
+
"mar",
|
| 186 |
+
"apr",
|
| 187 |
+
"may",
|
| 188 |
+
"jun",
|
| 189 |
+
"jul",
|
| 190 |
+
"aug",
|
| 191 |
+
"sep",
|
| 192 |
+
"oct",
|
| 193 |
+
"nov",
|
| 194 |
+
"dec",
|
| 195 |
+
)
|
| 196 |
+
RE_DAY = "(?P<day>\d+)"
|
| 197 |
+
RE_MONTH = "(?P<month>\w+)"
|
| 198 |
+
|
| 199 |
+
@staticmethod
|
| 200 |
+
def _parse_month(month):
|
| 201 |
+
def month_name_to_index(name):
|
| 202 |
+
try:
|
| 203 |
+
return BibTeX.MONTHS.index(name[:3].lower()) + 1
|
| 204 |
+
except ValueError:
|
| 205 |
+
return int(name)
|
| 206 |
+
|
| 207 |
+
begin = {}
|
| 208 |
+
end = {}
|
| 209 |
+
month = month.strip()
|
| 210 |
+
month = month.replace(", ", "-")
|
| 211 |
+
if month.isdecimal():
|
| 212 |
+
begin["month"] = end["month"] = month
|
| 213 |
+
elif month.replace("-", "").isalpha():
|
| 214 |
+
if "-" in month:
|
| 215 |
+
begin["month"], end["month"] = month.split("-")
|
| 216 |
+
else:
|
| 217 |
+
begin["month"] = end["month"] = month
|
| 218 |
+
else:
|
| 219 |
+
m = re.match(BibTeX.RE_DAY + "[ ~]*" + BibTeX.RE_MONTH, month)
|
| 220 |
+
if m is None:
|
| 221 |
+
m = re.match(BibTeX.RE_MONTH + "[ ~]*" + BibTeX.RE_DAY, month)
|
| 222 |
+
begin["day"] = end["day"] = int(m.group("day"))
|
| 223 |
+
begin["month"] = end["month"] = m.group("month")
|
| 224 |
+
begin["month"] = month_name_to_index(begin["month"])
|
| 225 |
+
end["month"] = month_name_to_index(end["month"])
|
| 226 |
+
return begin, end
|
| 227 |
+
|
| 228 |
+
def _parse_string(self, title):
|
| 229 |
+
def make_string(string, top_level_group=False):
|
| 230 |
+
unlatexed = parse_latex(string, self.preamble_macros)
|
| 231 |
+
fixed_case = top_level_group and not string.startswith("\\")
|
| 232 |
+
string_cls = NoCase if fixed_case else String
|
| 233 |
+
return string_cls(unlatexed)
|
| 234 |
+
|
| 235 |
+
title = str(title)
|
| 236 |
+
title = title.replace("\n", "")
|
| 237 |
+
title = " ".join(title.split())
|
| 238 |
+
|
| 239 |
+
output = MixedString()
|
| 240 |
+
level = 0
|
| 241 |
+
string = ""
|
| 242 |
+
for char in title:
|
| 243 |
+
if char == "{":
|
| 244 |
+
if level == 0:
|
| 245 |
+
if string:
|
| 246 |
+
output += make_string(string)
|
| 247 |
+
string = ""
|
| 248 |
+
level += 1
|
| 249 |
+
elif char == "}":
|
| 250 |
+
level -= 1
|
| 251 |
+
if level == 0:
|
| 252 |
+
output += make_string(string, True)
|
| 253 |
+
string = ""
|
| 254 |
+
else:
|
| 255 |
+
string += char
|
| 256 |
+
if level != 0:
|
| 257 |
+
raise SyntaxError('Non-matching braces in "{}"'.format(title))
|
| 258 |
+
if string:
|
| 259 |
+
output += make_string(string)
|
| 260 |
+
return output
|
| 261 |
+
|
| 262 |
+
def _parse_author(self, authors):
|
| 263 |
+
csl_authors = []
|
| 264 |
+
for author in split_names(authors):
|
| 265 |
+
first, von, last, jr = parse_name(author)
|
| 266 |
+
csl_parts = {}
|
| 267 |
+
for part, csl_label in [
|
| 268 |
+
(first, "given"),
|
| 269 |
+
(von, "non-dropping-particle"),
|
| 270 |
+
(last, "family"),
|
| 271 |
+
(jr, "suffix"),
|
| 272 |
+
]:
|
| 273 |
+
if part is not None:
|
| 274 |
+
csl_parts[csl_label] = parse_latex(part, self.preamble_macros)
|
| 275 |
+
name = Name(**csl_parts)
|
| 276 |
+
csl_authors.append(name)
|
| 277 |
+
return csl_authors
|
| 278 |
+
|
| 279 |
+
def create_reference(self, key, bibtex_entry):
|
| 280 |
+
csl_type = self.types[bibtex_entry.document_type]
|
| 281 |
+
csl_fields = self._bibtex_to_csl(bibtex_entry)
|
| 282 |
+
csl_date = self._bibtex_to_csl_date(bibtex_entry)
|
| 283 |
+
if csl_date:
|
| 284 |
+
csl_fields["issued"] = csl_date
|
| 285 |
+
ref = Reference(key, csl_type, **csl_fields)
|
| 286 |
+
return ref
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
# BibTeX name handling
|
| 290 |
+
#
|
| 291 |
+
# references
|
| 292 |
+
# - BibTeXing by Oren Patashnik (Feb 8, 1988), 4. Helpful Hints, item 18
|
| 293 |
+
# (BibTeX 0.99d - http://www.ctan.org/tex-archive/biblio/bibtex/base/btxdoc.pdf)
|
| 294 |
+
# - A summary of BibTex by Xavier Décoret
|
| 295 |
+
# (http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html)
|
| 296 |
+
# - Tame the BeaST by Nicolas Markey
|
| 297 |
+
# (http://tug.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf)
|
| 298 |
+
|
| 299 |
+
AND = " and "
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def split_names(string):
|
| 303 |
+
"""Split a string of names separated by 'and' into a list of names."""
|
| 304 |
+
brace_level = 0
|
| 305 |
+
names = []
|
| 306 |
+
last_index = 0
|
| 307 |
+
for i in range(len(string)):
|
| 308 |
+
char = string[i]
|
| 309 |
+
if brace_level == 0 and string[i:].startswith(AND):
|
| 310 |
+
names.append(string[last_index:i])
|
| 311 |
+
last_index = i + len(AND)
|
| 312 |
+
elif char == "{":
|
| 313 |
+
brace_level += 1
|
| 314 |
+
elif char == "}":
|
| 315 |
+
brace_level -= 1
|
| 316 |
+
last_name = string[last_index:]
|
| 317 |
+
if last_name:
|
| 318 |
+
names.append(last_name)
|
| 319 |
+
return names
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
def parse_name(name):
|
| 323 |
+
"""Parse a BibTeX name string and split it into First, von, Last and Jr
|
| 324 |
+
parts.
|
| 325 |
+
"""
|
| 326 |
+
parts = split_name(name)
|
| 327 |
+
if len(parts) == 1: # First von Last
|
| 328 |
+
(first_von_last,) = parts
|
| 329 |
+
index = 0
|
| 330 |
+
first, jr = [], []
|
| 331 |
+
for word in first_von_last[:-1]:
|
| 332 |
+
if is_capitalized(word) not in (True, None):
|
| 333 |
+
break
|
| 334 |
+
first.append(word)
|
| 335 |
+
index += 1
|
| 336 |
+
von_last = first_von_last[index:]
|
| 337 |
+
elif len(parts) == 2: # von Last, First
|
| 338 |
+
jr = []
|
| 339 |
+
von_last, first = parts
|
| 340 |
+
elif len(parts) == 3: # von Last, Jr, First
|
| 341 |
+
von_last, jr, first = parts
|
| 342 |
+
von, last = split_von_last(von_last)
|
| 343 |
+
join = " ".join
|
| 344 |
+
return join(first) or None, join(von) or None, join(last), join(jr) or None
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
def split_name(name):
|
| 348 |
+
"""Split a name in into parts delimited by commas (at brace-level 0), and
|
| 349 |
+
each part into words.
|
| 350 |
+
|
| 351 |
+
Returns a list of of lists of words.
|
| 352 |
+
"""
|
| 353 |
+
brace_level = 0
|
| 354 |
+
parts = []
|
| 355 |
+
current_part = []
|
| 356 |
+
word = ""
|
| 357 |
+
for char in name:
|
| 358 |
+
if char in " \t,":
|
| 359 |
+
if brace_level == 0:
|
| 360 |
+
if word:
|
| 361 |
+
current_part.append(word)
|
| 362 |
+
word = ""
|
| 363 |
+
if char == ",":
|
| 364 |
+
parts.append(current_part)
|
| 365 |
+
current_part = []
|
| 366 |
+
continue
|
| 367 |
+
elif char == "{":
|
| 368 |
+
brace_level += 1
|
| 369 |
+
elif char == "}":
|
| 370 |
+
brace_level -= 1
|
| 371 |
+
word += char
|
| 372 |
+
if word:
|
| 373 |
+
current_part.append(word)
|
| 374 |
+
parts.append(current_part)
|
| 375 |
+
return parts
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
def is_capitalized(string):
|
| 379 |
+
"""Check if a BibTeX substring is capitalized.
|
| 380 |
+
|
| 381 |
+
A string can be "case-less", in which case `None` is returned.
|
| 382 |
+
"""
|
| 383 |
+
brace_level = 0
|
| 384 |
+
special_char = False
|
| 385 |
+
for char, next_char in lookahead_iter(string):
|
| 386 |
+
if (brace_level == 0 or special_char) and char.isalpha():
|
| 387 |
+
return char.isupper()
|
| 388 |
+
elif char == "{":
|
| 389 |
+
brace_level += 1
|
| 390 |
+
if brace_level == 1 and next_char == "\\":
|
| 391 |
+
special_char = True
|
| 392 |
+
elif char == "}":
|
| 393 |
+
brace_level -= 1
|
| 394 |
+
if brace_level == 0:
|
| 395 |
+
special_char = False
|
| 396 |
+
return None # case-less
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
def split_von_last(words):
|
| 400 |
+
"""Split "von Last" name into von and Last parts."""
|
| 401 |
+
if len(words) > 1 and is_capitalized(words[0]) is False:
|
| 402 |
+
for j, word in enumerate(reversed(words[:-1])):
|
| 403 |
+
if is_capitalized(word) not in (True, None):
|
| 404 |
+
return words[: -j - 1], words[-j - 1 :]
|
| 405 |
+
return [], words
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
def lookahead_iter(iterable):
|
| 409 |
+
"""Iterator that also yields the next item along with each item. The next
|
| 410 |
+
item is `None` when yielding the last item.
|
| 411 |
+
"""
|
| 412 |
+
items = iter(iterable)
|
| 413 |
+
item = next(items)
|
| 414 |
+
for next_item in items:
|
| 415 |
+
yield item, next_item
|
| 416 |
+
item = next_item
|
| 417 |
+
yield item, None
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
EN_DASH = unicodedata.lookup("EN DASH")
|
citation.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import html
|
| 2 |
+
|
| 3 |
+
from citeproc import formatter, CitationStylesBibliography, Citation, CitationItem
|
| 4 |
+
from citeproc.source.json import CiteProcJSON
|
| 5 |
+
|
| 6 |
+
from enhanced_citation_style import EnhancedCitationStyle, get_style_name
|
| 7 |
+
from steps.utils import author_name_as_dict
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def display_citation(bibtex_metadata, bib_stylename, formatter=formatter.html):
|
| 11 |
+
# valid style names: plos, apa, pnas, nature, bmj, harvard1
|
| 12 |
+
# full list is here: https://github.com/citation-style-language/styles
|
| 13 |
+
|
| 14 |
+
bib_style = EnhancedCitationStyle(bib_stylename)
|
| 15 |
+
bibliography = CitationStylesBibliography(
|
| 16 |
+
bib_style, bibtex_metadata, formatter
|
| 17 |
+
) # could be formatter.html
|
| 18 |
+
citation = Citation([CitationItem("ITEM-1")])
|
| 19 |
+
bibliography.register(citation)
|
| 20 |
+
|
| 21 |
+
citation_parts = "".join(bibliography.bibliography()[0])
|
| 22 |
+
citation_text = "".join(citation_parts)
|
| 23 |
+
|
| 24 |
+
if bib_stylename == "apa":
|
| 25 |
+
# strip extra periods and spaces that can occur in APA format
|
| 26 |
+
citation_text = citation_text.replace("..", ".")
|
| 27 |
+
citation_text = citation_text.replace(" ", " ")
|
| 28 |
+
|
| 29 |
+
citation_text = citation_text.strip()
|
| 30 |
+
|
| 31 |
+
# strip leading comma
|
| 32 |
+
if citation_text.startswith(","):
|
| 33 |
+
citation_text = citation_text.lstrip(",").strip()
|
| 34 |
+
|
| 35 |
+
citation_text = strip_duplicate_apa_title(bibtex_metadata, citation_text)
|
| 36 |
+
|
| 37 |
+
citation_text = html.unescape(citation_text)
|
| 38 |
+
return citation_text
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def get_author_list(data_author):
|
| 42 |
+
author_list = []
|
| 43 |
+
for name_dict in data_author:
|
| 44 |
+
new_name_dict = {}
|
| 45 |
+
if "family" not in name_dict:
|
| 46 |
+
if "name" in name_dict:
|
| 47 |
+
new_name_dict["family"] = name_dict["name"]
|
| 48 |
+
else:
|
| 49 |
+
new_name_dict["family"] = ""
|
| 50 |
+
for name_k, name_v in name_dict.items():
|
| 51 |
+
if name_k == "literal":
|
| 52 |
+
new_name_dict = author_name_as_dict(name_v)
|
| 53 |
+
else:
|
| 54 |
+
new_name_dict[name_k] = name_v
|
| 55 |
+
author_list.append(new_name_dict)
|
| 56 |
+
return author_list
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def build_bibtex_author_list(authors):
|
| 60 |
+
author_list = ""
|
| 61 |
+
for i, author in enumerate(authors):
|
| 62 |
+
if i > 0:
|
| 63 |
+
author_list += " and "
|
| 64 |
+
|
| 65 |
+
if author.get("family"):
|
| 66 |
+
author_list += author.get("family")
|
| 67 |
+
|
| 68 |
+
if author.get("given"):
|
| 69 |
+
author_list += ", " + author.get("given")
|
| 70 |
+
|
| 71 |
+
return author_list
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def bibtex_pages_format(pages):
|
| 75 |
+
return pages.replace("-", "--")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def get_bib_source_from_dict(data):
|
| 79 |
+
data["id"] = "ITEM-1"
|
| 80 |
+
|
| 81 |
+
if "author" in data:
|
| 82 |
+
data["author"] = get_author_list(data["author"])
|
| 83 |
+
|
| 84 |
+
if "type" not in data:
|
| 85 |
+
data["type"] = "misc"
|
| 86 |
+
|
| 87 |
+
if data["type"] != "software":
|
| 88 |
+
for k, val in data.items():
|
| 89 |
+
if val and (k in ["title", "container-title"]):
|
| 90 |
+
num_upper = sum([1 for c in val if c.isupper()])
|
| 91 |
+
if num_upper > 0.75 * len(val):
|
| 92 |
+
data[k] = val.title()
|
| 93 |
+
|
| 94 |
+
if "page" in data and data["page"] == "-":
|
| 95 |
+
del data["page"]
|
| 96 |
+
|
| 97 |
+
if "bibtex" in data:
|
| 98 |
+
del data["bibtex"]
|
| 99 |
+
|
| 100 |
+
if "issued" in data:
|
| 101 |
+
if data["issued"]["date-parts"][0][0] is None:
|
| 102 |
+
del data["issued"]
|
| 103 |
+
|
| 104 |
+
bib_source = CiteProcJSON([data])
|
| 105 |
+
|
| 106 |
+
return bib_source
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def strip_duplicate_apa_title(bibtex_metadata, citation_text):
|
| 110 |
+
item = bibtex_metadata.get("item-1")
|
| 111 |
+
title = item.get("title")
|
| 112 |
+
if title and "Retrieved from https://github.com" not in citation_text:
|
| 113 |
+
title = "".join(title).replace(" ", " ")
|
| 114 |
+
if citation_text.count(title) == 2:
|
| 115 |
+
citation_text = citation_text.replace(title, "", 1)
|
| 116 |
+
if citation_text[0] == ".":
|
| 117 |
+
citation_text = citation_text.replace(".", "", 1)
|
| 118 |
+
citation_text = citation_text.lstrip()
|
| 119 |
+
return citation_text
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def citations(bibtex_metadata):
|
| 123 |
+
response = []
|
| 124 |
+
# full list of possible citation formats is here: https://github.com/citation-style-language/styles
|
| 125 |
+
for bib_stylename in [
|
| 126 |
+
"apa",
|
| 127 |
+
"harvard1",
|
| 128 |
+
"nature",
|
| 129 |
+
"modern-language-association-with-url",
|
| 130 |
+
"chicago-author-date",
|
| 131 |
+
"vancouver",
|
| 132 |
+
]:
|
| 133 |
+
citation_style_object = {
|
| 134 |
+
"style_shortname": bib_stylename,
|
| 135 |
+
"citation": display_citation(bibtex_metadata, bib_stylename),
|
| 136 |
+
"style_fullname": get_style_name(bib_stylename),
|
| 137 |
+
}
|
| 138 |
+
response.append(citation_style_object)
|
| 139 |
+
return response
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def export_contents(export_type, metadata_dict):
|
| 143 |
+
if export_type == "csv":
|
| 144 |
+
items = list(metadata_dict.items())
|
| 145 |
+
header_row = ",".join([name for (name, value) in items])
|
| 146 |
+
try:
|
| 147 |
+
value_row = ",".join([str(value) for (name, value) in items])
|
| 148 |
+
except UnicodeEncodeError:
|
| 149 |
+
value_row = ""
|
| 150 |
+
response = "{}\n{}".format(header_row, value_row)
|
| 151 |
+
return response
|
| 152 |
+
elif export_type == "ris":
|
| 153 |
+
response_list = []
|
| 154 |
+
response_list.append(("TY", "JOUR"))
|
| 155 |
+
response_list.append(("T1", metadata_dict.get("title", "")))
|
| 156 |
+
response_list.append(("JO", metadata_dict.get("container-title", "")))
|
| 157 |
+
response_list.append(("VL", metadata_dict.get("volume", "")))
|
| 158 |
+
response_list.append(("IS", metadata_dict.get("issue", "")))
|
| 159 |
+
response_list.append(("SP", metadata_dict.get("page", "")))
|
| 160 |
+
response_list.append(("V1", metadata_dict.get("year", "")))
|
| 161 |
+
response_list.append(("PB", metadata_dict.get("publisher", "")))
|
| 162 |
+
for author in metadata_dict.get("author", []):
|
| 163 |
+
response_list.append(
|
| 164 |
+
("A1", ", ".join([author.get("family", ""), author.get("given", "")]))
|
| 165 |
+
)
|
| 166 |
+
response = "\n".join("{} - {}".format(k, v) for (k, v) in response_list)
|
| 167 |
+
response += "\nER - "
|
| 168 |
+
return response
|
| 169 |
+
elif export_type == "enw":
|
| 170 |
+
response_list = []
|
| 171 |
+
response_list.append(("%T", metadata_dict.get("title", "")))
|
| 172 |
+
response_list.append(("%J", metadata_dict.get("container-title", "")))
|
| 173 |
+
response_list.append(("%V", metadata_dict.get("volume", "")))
|
| 174 |
+
response_list.append(("%N", metadata_dict.get("issue", "")))
|
| 175 |
+
response_list.append(("%P", metadata_dict.get("page", "")))
|
| 176 |
+
response_list.append(("%D", metadata_dict.get("year", "")))
|
| 177 |
+
response_list.append(("%I", metadata_dict.get("publisher", "")))
|
| 178 |
+
response_list.append(("0%", "Journal Article"))
|
| 179 |
+
for author in metadata_dict.get("author", []):
|
| 180 |
+
response_list.append(
|
| 181 |
+
("%A", ", ".join([author.get("family", ""), author.get("given", "")]))
|
| 182 |
+
)
|
| 183 |
+
response = "\n".join("{} {}".format(k, v) for (k, v) in response_list)
|
| 184 |
+
return response
|
| 185 |
+
elif export_type == "bibtex":
|
| 186 |
+
if metadata_dict.get("type"):
|
| 187 |
+
response = "@" + metadata_dict.get("type") + "{ITEM1, "
|
| 188 |
+
else:
|
| 189 |
+
response = "@article{ITEM1, "
|
| 190 |
+
|
| 191 |
+
response_list = []
|
| 192 |
+
|
| 193 |
+
response_list.append(("title", metadata_dict.get("title", "")))
|
| 194 |
+
|
| 195 |
+
# handle book type differently
|
| 196 |
+
if metadata_dict.get("type") == "book":
|
| 197 |
+
response_list.append(("isbn", metadata_dict.get("isbn", "")))
|
| 198 |
+
elif metadata_dict.get("type") == "software":
|
| 199 |
+
response_list.append(("url", metadata_dict.get("URL", "")))
|
| 200 |
+
response_list.append(("journal", metadata_dict.get("container-title", "")))
|
| 201 |
+
response_list.append(("volume", metadata_dict.get("volume", "")))
|
| 202 |
+
response_list.append(("number", metadata_dict.get("number", "")))
|
| 203 |
+
else:
|
| 204 |
+
response_list.append(("journal", metadata_dict.get("container-title", "")))
|
| 205 |
+
response_list.append(("volume", metadata_dict.get("volume", "")))
|
| 206 |
+
response_list.append(("number", metadata_dict.get("number", "")))
|
| 207 |
+
|
| 208 |
+
response_list.append(
|
| 209 |
+
("pages", bibtex_pages_format(metadata_dict.get("page", "")))
|
| 210 |
+
)
|
| 211 |
+
response_list.append(("year", metadata_dict.get("year", "")))
|
| 212 |
+
response_list.append(("publisher", metadata_dict.get("publisher", "")))
|
| 213 |
+
author_list = build_bibtex_author_list(metadata_dict.get("author", []))
|
| 214 |
+
response_list.append(("author", author_list))
|
| 215 |
+
|
| 216 |
+
response += ",\n".join("{}={{{}}}".format(k, v) for (k, v) in response_list)
|
| 217 |
+
response += "}"
|
| 218 |
+
|
| 219 |
+
return response
|
| 220 |
+
|
| 221 |
+
return None
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def reference_manager_exports(metadata_dict):
|
| 225 |
+
response = []
|
| 226 |
+
for export_name in ["csv", "enw", "ris", "bibtex"]:
|
| 227 |
+
export_object = {
|
| 228 |
+
"export_name": export_name,
|
| 229 |
+
"export": export_contents(export_name, metadata_dict),
|
| 230 |
+
}
|
| 231 |
+
response.append(export_object)
|
| 232 |
+
return response
|
data/doaj_issns.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/doaj_titles.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/vogt.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
enhanced_citation_style.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from citeproc import CitationStylesStyle
|
| 2 |
+
from citeproc_styles import get_style_filepath
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class EnhancedCitationStyle(CitationStylesStyle):
|
| 6 |
+
def __init__(self, bib_stylename):
|
| 7 |
+
# valid style names: plos, apa, pnas, nature, bmj, harvard1
|
| 8 |
+
# full list is here: https://github.com/citation-style-language/styles
|
| 9 |
+
|
| 10 |
+
self.style_path = get_style_filepath(bib_stylename)
|
| 11 |
+
super(EnhancedCitationStyle, self).__init__(self.style_path, validate=False)
|
| 12 |
+
|
| 13 |
+
@property
|
| 14 |
+
def name(self):
|
| 15 |
+
info = self.root.find("{http://purl.org/net/xbiblio/csl}info")
|
| 16 |
+
if info is not None:
|
| 17 |
+
title = info.find("{http://purl.org/net/xbiblio/csl}title")
|
| 18 |
+
return title.text
|
| 19 |
+
return self.style_path
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def get_style_name(bib_stylename):
|
| 23 |
+
style_obj = EnhancedCitationStyle(bib_stylename)
|
| 24 |
+
return style_obj.name
|
pytest.ini
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[pytest]
|
| 2 |
+
addopts = -v
|
| 3 |
+
filterwarnings =
|
| 4 |
+
ignore::UserWarning
|
| 5 |
+
ignore::DeprecationWarning
|
requirements.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
arxiv2bib==1.0.8
|
| 2 |
+
black==21.7b0
|
| 3 |
+
citeproc-py==0.5.1
|
| 4 |
+
citeproc-py-styles==0.1.2
|
| 5 |
+
Flask==2.0.1
|
| 6 |
+
Flask-DebugToolbar==0.11.0
|
| 7 |
+
google==3.0.0
|
| 8 |
+
gunicorn==20.1.0
|
| 9 |
+
json5==0.9.6
|
| 10 |
+
nameparser==1.0.6
|
| 11 |
+
pytest==6.2.5
|
| 12 |
+
requests==2.26.0
|
| 13 |
+
requests-cache==0.8.1
|
| 14 |
+
sentry-sdk==1.3.1
|
| 15 |
+
unidecode==1.3.2
|
| 16 |
+
validators==0.18.2
|
runtime.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
python-3.8.12
|
software.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from citation import get_bib_source_from_dict, citations, reference_manager_exports
|
| 2 |
+
from steps.user_input import UserInputStep
|
| 3 |
+
from steps.exceptions import NoChildrenException
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class Software(object):
|
| 7 |
+
def __init__(self, user_supplied_id):
|
| 8 |
+
self.user_supplied_id = user_supplied_id
|
| 9 |
+
self.completed_steps = []
|
| 10 |
+
|
| 11 |
+
def find_metadata(self):
|
| 12 |
+
my_step = UserInputStep()
|
| 13 |
+
my_step.set_content_url(self.user_supplied_id)
|
| 14 |
+
my_step.set_content(self.user_supplied_id)
|
| 15 |
+
self.completed_steps.append(my_step)
|
| 16 |
+
|
| 17 |
+
cursor = 0
|
| 18 |
+
while not self.completed_steps[-1].is_metadata:
|
| 19 |
+
current_step = self.completed_steps[cursor]
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
next_step = current_step.get_child()
|
| 23 |
+
self.completed_steps.append(next_step)
|
| 24 |
+
cursor = len(self.completed_steps) - 1
|
| 25 |
+
except NoChildrenException:
|
| 26 |
+
cursor -= 1
|
| 27 |
+
|
| 28 |
+
@property
|
| 29 |
+
def name(self):
|
| 30 |
+
if self.metadata and self.metadata.get("title", ""):
|
| 31 |
+
response = self.metadata.get("title", "")
|
| 32 |
+
if response.__class__.__name__ == "MixedString":
|
| 33 |
+
return response.pop()
|
| 34 |
+
else:
|
| 35 |
+
return response
|
| 36 |
+
return self.display_url
|
| 37 |
+
|
| 38 |
+
@property
|
| 39 |
+
def display_url(self):
|
| 40 |
+
return self.completed_steps[0].content_url
|
| 41 |
+
|
| 42 |
+
@property
|
| 43 |
+
def metadata(self):
|
| 44 |
+
metadata_step = self.completed_steps[-1]
|
| 45 |
+
if metadata_step.content.get("issued"):
|
| 46 |
+
try:
|
| 47 |
+
year = metadata_step.content["issued"]["date-parts"][0][0]
|
| 48 |
+
except IndexError:
|
| 49 |
+
year = ""
|
| 50 |
+
metadata_step.content["year"] = year
|
| 51 |
+
|
| 52 |
+
metadata_dict = metadata_step.content
|
| 53 |
+
|
| 54 |
+
for step in reversed(self.completed_steps):
|
| 55 |
+
if step.url and step.content:
|
| 56 |
+
metadata_dict["URL"] = step.url
|
| 57 |
+
break
|
| 58 |
+
|
| 59 |
+
return metadata_dict
|
| 60 |
+
|
| 61 |
+
def get_provenance(self):
|
| 62 |
+
ret = [s.to_dict() for s in self.completed_steps]
|
| 63 |
+
return ret
|
| 64 |
+
|
| 65 |
+
@property
|
| 66 |
+
def citation_plain(self):
|
| 67 |
+
citations = self.to_dict()["citations"]
|
| 68 |
+
return next(
|
| 69 |
+
(i["citation"] for i in citations if i["style_shortname"] == "harvard1"),
|
| 70 |
+
None,
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
def to_dict(self):
|
| 74 |
+
bibtex_metadata = get_bib_source_from_dict(self.metadata)
|
| 75 |
+
|
| 76 |
+
ret = {
|
| 77 |
+
"url": self.display_url,
|
| 78 |
+
"name": self.name,
|
| 79 |
+
"citations": citations(bibtex_metadata),
|
| 80 |
+
"exports": reference_manager_exports(self.metadata),
|
| 81 |
+
"metadata": self.metadata,
|
| 82 |
+
"provenance": self.get_provenance(),
|
| 83 |
+
}
|
| 84 |
+
return ret
|
static/img/badges/babel.png
ADDED
|
static/img/badges/bff.png
ADDED
|
static/img/badges/big_hit.png
ADDED
|
static/img/badges/big_in_japan.png
ADDED
|
static/img/badges/buzz.psd
ADDED
|
|
Git LFS Details
|
static/img/badges/clean_sweep.png
ADDED
|
static/img/badges/controversial.png
ADDED
|
static/img/badges/deep_interest.png
ADDED
|
static/img/badges/depsy.png
ADDED
|
static/img/badges/famous_follower.png
ADDED
|
static/img/badges/first_steps.png
ADDED
|
static/img/badges/fun.psd
ADDED
|
|
Git LFS Details
|
static/img/badges/gender_balance.png
ADDED
|
static/img/badges/geo.psd
ADDED
|
|
Git LFS Details
|
static/img/badges/global_reach.png
ADDED
|
static/img/badges/global_reach_wrong_color.png
ADDED
|
static/img/badges/global_south.png
ADDED
|
static/img/badges/gold_star.png
ADDED
|
static/img/badges/hot_streak.png
ADDED
|
static/img/badges/hot_streak_wrong_color.png
ADDED
|
static/img/badges/impressions.png
ADDED
|
static/img/badges/influence.psd
ADDED
|
|
Git LFS Details
|
static/img/badges/ivory_tower.png
ADDED
|
static/img/badges/megafan.png
ADDED
|
static/img/badges/noun_16718.png
ADDED
|
static/img/badges/oa_advocate.png
ADDED
|
static/img/badges/oa_early_adopter.png
ADDED
|
static/img/badges/open_science_triathlete.png
ADDED
|
static/img/badges/open_sesame.png
ADDED
|
static/img/badges/openness.png
ADDED
|
static/img/badges/openness.psd
ADDED
|
|
Git LFS Details
|
static/img/badges/reading_level.png
ADDED
|
static/img/badges/rick_roll.png
ADDED
|
static/img/badges/rickroll.png
ADDED
|