Spaces:
Runtime error
Runtime error
| import re | |
| from flask import abort | |
| import requests | |
| import validators | |
| from steps.arxiv import ArxivResponseStep | |
| from steps.bitbucket import BitbucketRepoStep | |
| from steps.cran import CranLibraryStep | |
| from steps.core import Step | |
| from steps.crossref import CrossrefResponseStep | |
| from steps.github import GithubRepoStep | |
| from steps.google import GoogleStep | |
| from steps.pmid import PMIDStep | |
| from steps.pypi import PypiLibraryStep | |
| from steps.webpage import WebpageStep | |
| class UserInputStep(Step): | |
| def starting_children(self): | |
| return [ | |
| GoogleStep, | |
| CrossrefResponseStep, | |
| PMIDStep, | |
| ArxivResponseStep, | |
| GithubRepoStep, | |
| BitbucketRepoStep, | |
| CranLibraryStep, | |
| PypiLibraryStep, | |
| WebpageStep, | |
| ] | |
| def set_content_url(self, input): | |
| url = self.build_starting_url(input) | |
| if url.startswith("ftp://"): | |
| abort(404) | |
| if "readthedocs" in url: | |
| url = self.get_citation_html_file(url) | |
| self.content_url = url | |
| def set_content(self, input): | |
| if self.content_url.startswith("http://arxiv"): | |
| self.content = self.content_url.replace("http://", "").replace( | |
| ".org/abs/", ":" | |
| ) | |
| else: | |
| self.content = self.content_url | |
| def build_starting_url(self, input): | |
| # doi | |
| if input.startswith("10."): | |
| url = "http://doi.org/{}".format(input) | |
| # web page | |
| elif input.startswith(("http://", "https://")): | |
| url = input | |
| # url in string | |
| elif re.search("(?P<url>https?://[^\s]+)", input): | |
| url = re.search("(?P<url>https?://[^\s]+)", input).group("url") | |
| # arxiv | |
| elif input.lower().startswith("arxiv"): | |
| id = input.split(":", 1)[1] | |
| url = "http://arxiv.org/abs/{}".format(id) | |
| # arvix ID only, like 1812.02329 | |
| elif self.is_arxiv_id(input): | |
| url = "http://arxiv.org/abs/{}".format(input) | |
| # add http to see if it is a valid URL | |
| elif self.is_valid_url(input): | |
| url = "http://{}".format(input) | |
| else: | |
| # google search | |
| url = input | |
| self.key_word = input | |
| return url | |
| def is_arxiv_id(input): | |
| r = re.compile("\d{4}.\d{5}") | |
| if r.match(input.lower()): | |
| return True | |
| def is_valid_url(input): | |
| url = "http://{}".format(input) | |
| if validators.url(url): | |
| try: | |
| r = requests.get(url, timeout=1) | |
| if r.status_code == requests.codes.ok: | |
| return True | |
| except: | |
| return False | |
| def get_citation_html_file(url): | |
| # citation paths | |
| citation_opt_1 = "citation.html" | |
| citation_opt_2 = "reference/citing.html" | |
| # format url | |
| if url.endswith("en/stable") or url.endswith("en/latest"): | |
| citation_urls = [url + "/" + citation_opt_1, url + "/" + citation_opt_2] | |
| elif url.endswith("en/stable/") or url.endswith("en/latest/"): | |
| citation_urls = [url + citation_opt_1, url + citation_opt_2] | |
| elif url.endswith("/"): | |
| citation_urls = [ | |
| url + "en/stable/" + citation_opt_1, | |
| url + "en/stable/" + citation_opt_2, | |
| ] | |
| else: | |
| citation_urls = [ | |
| url + "en/stable/" + citation_opt_1, | |
| url + "en/stable/" + citation_opt_2, | |
| ] | |
| # check if citation exists | |
| try: | |
| for citation_url in citation_urls: | |
| r = requests.get(citation_url, timeout=2) | |
| if r.status_code == 200: | |
| return citation_url | |
| return url | |
| except requests.exceptions.RequestException: | |
| return url | |