Spaces:
Sleeping
Sleeping
Delete tools.py
Browse files
tools.py
DELETED
@@ -1,170 +0,0 @@
|
|
1 |
-
from langchain.tools import tool
|
2 |
-
from bs4 import BeautifulSoup
|
3 |
-
import requests
|
4 |
-
import datetime
|
5 |
-
import json
|
6 |
-
import time
|
7 |
-
import re
|
8 |
-
|
9 |
-
def clean_text(text):
|
10 |
-
"""Clean text from HTML tags and extra whitespace"""
|
11 |
-
if not text:
|
12 |
-
return ""
|
13 |
-
text = re.sub(r'<[^>]+>', '', text)
|
14 |
-
text = re.sub(r'\s+', ' ', text)
|
15 |
-
return text.strip()
|
16 |
-
|
17 |
-
@tool
|
18 |
-
def pmc_search(query: str) -> str:
|
19 |
-
"""Search PubMed Central (PMC) for articles"""
|
20 |
-
try:
|
21 |
-
# Base URLs for PubMed APIs
|
22 |
-
search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
23 |
-
fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
24 |
-
|
25 |
-
# Search parameters
|
26 |
-
search_params = {
|
27 |
-
"db": "pmc",
|
28 |
-
"term": query,
|
29 |
-
"retmax": 20,
|
30 |
-
"retmode": "json",
|
31 |
-
"sort": "relevance"
|
32 |
-
}
|
33 |
-
|
34 |
-
# Get article IDs
|
35 |
-
response = requests.get(search_url, params=search_params)
|
36 |
-
if not response.ok:
|
37 |
-
return json.dumps([{"error": "PubMed search failed"}])
|
38 |
-
|
39 |
-
try:
|
40 |
-
search_data = response.json()
|
41 |
-
article_ids = search_data.get("esearchresult", {}).get("idlist", [])
|
42 |
-
except:
|
43 |
-
# Fallback to XML parsing if JSON fails
|
44 |
-
soup = BeautifulSoup(response.text, 'xml')
|
45 |
-
article_ids = [id.text for id in soup.find_all('Id')]
|
46 |
-
|
47 |
-
articles = []
|
48 |
-
for pmid in article_ids:
|
49 |
-
try:
|
50 |
-
# Fetch article details
|
51 |
-
fetch_params = {
|
52 |
-
"db": "pmc",
|
53 |
-
"id": pmid,
|
54 |
-
"retmode": "xml"
|
55 |
-
}
|
56 |
-
article_response = requests.get(fetch_url, params=fetch_params)
|
57 |
-
if not article_response.ok:
|
58 |
-
continue
|
59 |
-
|
60 |
-
article_soup = BeautifulSoup(article_response.text, 'xml')
|
61 |
-
|
62 |
-
# Extract article data
|
63 |
-
title_elem = article_soup.find("article-title")
|
64 |
-
title = clean_text(title_elem.text if title_elem else "No title")
|
65 |
-
|
66 |
-
abstract_elem = article_soup.find("abstract")
|
67 |
-
abstract = clean_text(abstract_elem.text if abstract_elem else "No abstract")
|
68 |
-
|
69 |
-
authors = []
|
70 |
-
for author in article_soup.find_all(["author", "contrib"]):
|
71 |
-
surname = author.find(["surname", "last-name"])
|
72 |
-
given_name = author.find(["given-names", "first-name"])
|
73 |
-
if surname:
|
74 |
-
author_name = surname.text
|
75 |
-
if given_name:
|
76 |
-
author_name = f"{given_name.text} {author_name}"
|
77 |
-
authors.append(clean_text(author_name))
|
78 |
-
|
79 |
-
year_elem = article_soup.find(["pub-date", "year"])
|
80 |
-
year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"
|
81 |
-
|
82 |
-
journal_elem = article_soup.find(["journal-title", "source"])
|
83 |
-
journal = clean_text(journal_elem.text if journal_elem else "Unknown Journal")
|
84 |
-
|
85 |
-
articles.append({
|
86 |
-
"id": pmid,
|
87 |
-
"title": title,
|
88 |
-
"authors": authors,
|
89 |
-
"year": year,
|
90 |
-
"journal": journal,
|
91 |
-
"abstract": abstract
|
92 |
-
})
|
93 |
-
|
94 |
-
# Add delay to avoid rate limiting
|
95 |
-
time.sleep(0.5)
|
96 |
-
|
97 |
-
except Exception as e:
|
98 |
-
continue
|
99 |
-
|
100 |
-
return json.dumps(articles, indent=2)
|
101 |
-
|
102 |
-
except Exception as e:
|
103 |
-
return json.dumps([{"error": f"PMC search failed: {str(e)}"}])
|
104 |
-
|
105 |
-
@tool
|
106 |
-
def google_scholar_search(query: str) -> str:
|
107 |
-
"""Search alternative sources for medical literature"""
|
108 |
-
try:
|
109 |
-
# Use alternative medical literature sources
|
110 |
-
search_urls = [
|
111 |
-
f"https://europepmc.org/webservices/rest/search?query={query}&format=json&pageSize=20",
|
112 |
-
f"https://api.semanticscholar.org/graph/v1/paper/search?query={query}&limit=20&fields=title,abstract,year,authors,venue"
|
113 |
-
]
|
114 |
-
|
115 |
-
results = []
|
116 |
-
|
117 |
-
for url in search_urls:
|
118 |
-
try:
|
119 |
-
headers = {
|
120 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
121 |
-
}
|
122 |
-
response = requests.get(url, headers=headers)
|
123 |
-
|
124 |
-
if not response.ok:
|
125 |
-
continue
|
126 |
-
|
127 |
-
data = response.json()
|
128 |
-
|
129 |
-
# Handle Europe PMC response
|
130 |
-
if "resultList" in data:
|
131 |
-
for result in data["resultList"].get("result", []):
|
132 |
-
pub = {
|
133 |
-
"title": clean_text(result.get("title", "No title")),
|
134 |
-
"authors": [clean_text(author.get("fullName", "Unknown")) for author in result.get("authorList", {}).get("author", [])],
|
135 |
-
"year": result.get("pubYear", "Unknown"),
|
136 |
-
"journal": clean_text(result.get("journalTitle", "Unknown Journal")),
|
137 |
-
"abstract": clean_text(result.get("abstractText", "No abstract")),
|
138 |
-
"source": "Europe PMC"
|
139 |
-
}
|
140 |
-
if pub["title"] != "No title" and pub["abstract"] != "No abstract":
|
141 |
-
results.append(pub)
|
142 |
-
|
143 |
-
# Handle Semantic Scholar response
|
144 |
-
elif "data" in data:
|
145 |
-
for paper in data["data"]:
|
146 |
-
pub = {
|
147 |
-
"title": clean_text(paper.get("title", "No title")),
|
148 |
-
"authors": [clean_text(author.get("name", "Unknown")) for author in paper.get("authors", [])],
|
149 |
-
"year": paper.get("year", "Unknown"),
|
150 |
-
"journal": clean_text(paper.get("venue", "Unknown Journal")),
|
151 |
-
"abstract": clean_text(paper.get("abstract", "No abstract")),
|
152 |
-
"source": "Semantic Scholar"
|
153 |
-
}
|
154 |
-
if pub["title"] != "No title" and pub["abstract"] != "No abstract":
|
155 |
-
results.append(pub)
|
156 |
-
|
157 |
-
time.sleep(1) # Rate limiting
|
158 |
-
|
159 |
-
except Exception as e:
|
160 |
-
continue
|
161 |
-
|
162 |
-
return json.dumps(results, indent=2)
|
163 |
-
|
164 |
-
except Exception as e:
|
165 |
-
return json.dumps([{"error": f"Literature search failed: {str(e)}"}])
|
166 |
-
|
167 |
-
@tool
|
168 |
-
def today_tool() -> str:
|
169 |
-
"""Get today's date"""
|
170 |
-
return str(datetime.date.today())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|