Spaces:

raannakasturi
/

cite-as

Runtime error

File size: 13,130 Bytes

dc5b905

#!/usr/bin/python
# -*- coding: utf-8 -*-

from citeproc.py2compat import *

# copied from https://github.com/brechtm/citeproc-py/blob/master/citeproc/source/bibtex/bibtex.py
# then modified to fix bugs.  search for "hap" to see mods

import re
import unicodedata

from warnings import warn

from citeproc.types import (
    ARTICLE,
    ARTICLE_JOURNAL,
    BOOK,
    CHAPTER,
    MANUSCRIPT,
    PAMPHLET,
    PAPER_CONFERENCE,
    REPORT,
    THESIS,
)
from citeproc.string import String, MixedString, NoCase
from citeproc.source import BibliographySource, Reference, Name, Date, DateRange
from citeproc.source.bibtex.bibparse import BibTeXParser
from citeproc.source.bibtex.latex import parse_latex
from citeproc.source.bibtex.latex.macro import NewCommand, Macro


class BibTeX(BibliographySource):
    fields = {
        "address": "publisher_place",
        "annote": "annote",
        "author": "author",
        "booktitle": "container_title",
        "chapter": "chapter_number",
        "edition": "edition",
        "editor": "editor",
        #              'howpublished': None,
        #              'institution': None,
        "journal": "container_title",
        #              'month': None,
        "note": "note",
        "number": "issue",
        #              'organization': None,
        "pages": "page",
        "publisher": "publisher",
        #              'school': None,
        "series": "collection_title",
        "title": "title",
        #              'type': None,
        #              'year': None,
        "volume": "volume",
        # hap added doi and url
        "doi": "doi",
        "url": "url",
        # non-standard fields
        "isbn": "ISBN",
        "issn": "ISSN",
    }

    types = {  # standard entry types
        "article": ARTICLE_JOURNAL,
        "book": BOOK,
        "booklet": PAMPHLET,
        "conference": PAPER_CONFERENCE,
        "inbook": CHAPTER,
        "incollection": ARTICLE_JOURNAL,
        "inproceedings": PAPER_CONFERENCE,
        "manual": BOOK,
        "mastersthesis": THESIS,
        "misc": ARTICLE,
        "phdthesis": THESIS,
        "proceedings": BOOK,
        "techreport": REPORT,
        "unpublished": MANUSCRIPT,
        # non-standard entry types
        "thesis": THESIS,
        "report": REPORT,
    }

    def __init__(self, filename, encoding="ascii"):
        bibtex_database = BibTeXParser(filename)
        bibtex_database.encoding = encoding
        self.preamble_macros = {}
        parse_latex(
            bibtex_database.preamble,
            {
                "newcommand": NewCommand(self.preamble_macros),
                "mbox": Macro(1, "{0}"),
                "cite": Macro(1, "CITE({0})"),
            },
        )
        for key, entry in bibtex_database.items():
            self.add(self.create_reference(key, entry))

    def _bibtex_to_csl(self, bibtex_entry):
        csl_dict = {}
        for field, value in bibtex_entry.items():
            try:
                value = value.strip()
            except AttributeError:
                pass

            try:
                csl_field = self.fields[field]
            except KeyError:
                csl_field = field

            if field in ("number", "volume"):
                try:
                    value = int(value)
                except ValueError:
                    pass
            elif field == "pages":
                value = self._bibtex_to_csl_pages(value)
            elif field in ("author", "editor"):
                try:
                    value = [name for name in self._parse_author(value)]
                except RuntimeError:
                    pass
            else:
                try:
                    value = self._parse_string(value)
                except TypeError:
                    value = str(value)

            csl_dict[csl_field] = value
            # print("csl_dict: {}".format(csl_dict))
        return csl_dict

    @staticmethod
    def _bibtex_to_csl_pages(value):
        value = value.replace(" ", "")
        if "-" in value:
            try:
                first, last = value.split("--")
            except ValueError:
                first, last = value.split("-")
            pages = "-".join((first, last))
        else:
            pages = value[:-1] if value.endswith("+") else value
        return pages

    def _bibtex_to_csl_date(self, bibtex_entry):
        # hap commented out the month section on feb 18, 2017 because was causing bugs,
        # if 'month' in bibtex_entry:
        #     begin_dict, end_dict = self._parse_month(bibtex_entry['month'])
        # else:
        #     begin_dict, end_dict = {}, {}

        # hap replaced section above with this, ignoring the month.
        begin_dict, end_dict = {}, {}

        if "year" in bibtex_entry:
            begin_dict["year"], end_dict["year"] = self._parse_year(
                bibtex_entry["year"]
            )
        if not begin_dict:
            return None
        if begin_dict == end_dict:
            return Date(**begin_dict)
        else:
            return DateRange(begin=Date(**begin_dict), end=Date(**end_dict))

    def _parse_year(self, year):
        try:
            year_str = parse_latex(year, self.preamble_macros)
        except TypeError:
            year_str = str(year)
        if EN_DASH in year_str:
            begin_year, end_year = year_str.split(EN_DASH)
            begin_len, end_len = len(begin_year), len(end_year)
            if end_len < begin_len:
                end_year = begin_year[: begin_len - end_len] + end_year
        else:
            begin_year = end_year = int(year_str)
        return begin_year, end_year

    MONTHS = (
        "jan",
        "feb",
        "mar",
        "apr",
        "may",
        "jun",
        "jul",
        "aug",
        "sep",
        "oct",
        "nov",
        "dec",
    )
    RE_DAY = "(?P<day>\d+)"
    RE_MONTH = "(?P<month>\w+)"

    @staticmethod
    def _parse_month(month):
        def month_name_to_index(name):
            try:
                return BibTeX.MONTHS.index(name[:3].lower()) + 1
            except ValueError:
                return int(name)

        begin = {}
        end = {}
        month = month.strip()
        month = month.replace(", ", "-")
        if month.isdecimal():
            begin["month"] = end["month"] = month
        elif month.replace("-", "").isalpha():
            if "-" in month:
                begin["month"], end["month"] = month.split("-")
            else:
                begin["month"] = end["month"] = month
        else:
            m = re.match(BibTeX.RE_DAY + "[ ~]*" + BibTeX.RE_MONTH, month)
            if m is None:
                m = re.match(BibTeX.RE_MONTH + "[ ~]*" + BibTeX.RE_DAY, month)
            begin["day"] = end["day"] = int(m.group("day"))
            begin["month"] = end["month"] = m.group("month")
        begin["month"] = month_name_to_index(begin["month"])
        end["month"] = month_name_to_index(end["month"])
        return begin, end

    def _parse_string(self, title):
        def make_string(string, top_level_group=False):
            unlatexed = parse_latex(string, self.preamble_macros)
            fixed_case = top_level_group and not string.startswith("\\")
            string_cls = NoCase if fixed_case else String
            return string_cls(unlatexed)

        title = str(title)
        title = title.replace("\n", "")
        title = " ".join(title.split())

        output = MixedString()
        level = 0
        string = ""
        for char in title:
            if char == "{":
                if level == 0:
                    if string:
                        output += make_string(string)
                        string = ""
                level += 1
            elif char == "}":
                level -= 1
                if level == 0:
                    output += make_string(string, True)
                    string = ""
            else:
                string += char
        if level != 0:
            raise SyntaxError('Non-matching braces in "{}"'.format(title))
        if string:
            output += make_string(string)
        return output

    def _parse_author(self, authors):
        csl_authors = []
        for author in split_names(authors):
            first, von, last, jr = parse_name(author)
            csl_parts = {}
            for part, csl_label in [
                (first, "given"),
                (von, "non-dropping-particle"),
                (last, "family"),
                (jr, "suffix"),
            ]:
                if part is not None:
                    csl_parts[csl_label] = parse_latex(part, self.preamble_macros)
            name = Name(**csl_parts)
            csl_authors.append(name)
        return csl_authors

    def create_reference(self, key, bibtex_entry):
        csl_type = self.types[bibtex_entry.document_type]
        csl_fields = self._bibtex_to_csl(bibtex_entry)
        csl_date = self._bibtex_to_csl_date(bibtex_entry)
        if csl_date:
            csl_fields["issued"] = csl_date
        ref = Reference(key, csl_type, **csl_fields)
        return ref


# BibTeX name handling
#
# references
#  - BibTeXing by Oren Patashnik (Feb 8, 1988), 4. Helpful Hints, item 18
#    (BibTeX 0.99d - http://www.ctan.org/tex-archive/biblio/bibtex/base/btxdoc.pdf)
#  - A summary of BibTex by Xavier Décoret
#    (http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html)
#  - Tame the BeaST by Nicolas Markey
#    (http://tug.ctan.org/info/bibtex/tamethebeast/ttb_en.pdf)

AND = " and "


def split_names(string):
    """Split a string of names separated by 'and' into a list of names."""
    brace_level = 0
    names = []
    last_index = 0
    for i in range(len(string)):
        char = string[i]
        if brace_level == 0 and string[i:].startswith(AND):
            names.append(string[last_index:i])
            last_index = i + len(AND)
        elif char == "{":
            brace_level += 1
        elif char == "}":
            brace_level -= 1
    last_name = string[last_index:]
    if last_name:
        names.append(last_name)
    return names


def parse_name(name):
    """Parse a BibTeX name string and split it into First, von, Last and Jr
    parts.
    """
    parts = split_name(name)
    if len(parts) == 1:  # First von Last
        (first_von_last,) = parts
        index = 0
        first, jr = [], []
        for word in first_von_last[:-1]:
            if is_capitalized(word) not in (True, None):
                break
            first.append(word)
            index += 1
        von_last = first_von_last[index:]
    elif len(parts) == 2:  # von Last, First
        jr = []
        von_last, first = parts
    elif len(parts) == 3:  # von Last, Jr, First
        von_last, jr, first = parts
    von, last = split_von_last(von_last)
    join = " ".join
    return join(first) or None, join(von) or None, join(last), join(jr) or None


def split_name(name):
    """Split a name in into parts delimited by commas (at brace-level 0), and
    each part into words.

    Returns a list of of lists of words.
    """
    brace_level = 0
    parts = []
    current_part = []
    word = ""
    for char in name:
        if char in " \t,":
            if brace_level == 0:
                if word:
                    current_part.append(word)
                    word = ""
                if char == ",":
                    parts.append(current_part)
                    current_part = []
                continue
        elif char == "{":
            brace_level += 1
        elif char == "}":
            brace_level -= 1
        word += char
    if word:
        current_part.append(word)
        parts.append(current_part)
    return parts


def is_capitalized(string):
    """Check if a BibTeX substring is capitalized.

    A string can be "case-less", in which case `None` is returned.
    """
    brace_level = 0
    special_char = False
    for char, next_char in lookahead_iter(string):
        if (brace_level == 0 or special_char) and char.isalpha():
            return char.isupper()
        elif char == "{":
            brace_level += 1
            if brace_level == 1 and next_char == "\\":
                special_char = True
        elif char == "}":
            brace_level -= 1
            if brace_level == 0:
                special_char = False
    return None  # case-less


def split_von_last(words):
    """Split "von Last" name into von and Last parts."""
    if len(words) > 1 and is_capitalized(words[0]) is False:
        for j, word in enumerate(reversed(words[:-1])):
            if is_capitalized(word) not in (True, None):
                return words[: -j - 1], words[-j - 1 :]
    return [], words


def lookahead_iter(iterable):
    """Iterator that also yields the next item along with each item. The next
    item is `None` when yielding the last item.
    """
    items = iter(iterable)
    item = next(items)
    for next_item in items:
        yield item, next_item
        item = next_item
    yield item, None


EN_DASH = unicodedata.lookup("EN DASH")