ferferefer commited on
Commit
8b9e744
·
verified ·
1 Parent(s): 455411e

Delete tools.py

Browse files
Files changed (1) hide show
  1. tools.py +0 -170
tools.py DELETED
@@ -1,170 +0,0 @@
1
- from langchain.tools import tool
2
- from bs4 import BeautifulSoup
3
- import requests
4
- import datetime
5
- import json
6
- import time
7
- import re
8
-
9
- def clean_text(text):
10
- """Clean text from HTML tags and extra whitespace"""
11
- if not text:
12
- return ""
13
- text = re.sub(r'<[^>]+>', '', text)
14
- text = re.sub(r'\s+', ' ', text)
15
- return text.strip()
16
-
17
- @tool
18
- def pmc_search(query: str) -> str:
19
- """Search PubMed Central (PMC) for articles"""
20
- try:
21
- # Base URLs for PubMed APIs
22
- search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
23
- fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
24
-
25
- # Search parameters
26
- search_params = {
27
- "db": "pmc",
28
- "term": query,
29
- "retmax": 20,
30
- "retmode": "json",
31
- "sort": "relevance"
32
- }
33
-
34
- # Get article IDs
35
- response = requests.get(search_url, params=search_params)
36
- if not response.ok:
37
- return json.dumps([{"error": "PubMed search failed"}])
38
-
39
- try:
40
- search_data = response.json()
41
- article_ids = search_data.get("esearchresult", {}).get("idlist", [])
42
- except:
43
- # Fallback to XML parsing if JSON fails
44
- soup = BeautifulSoup(response.text, 'xml')
45
- article_ids = [id.text for id in soup.find_all('Id')]
46
-
47
- articles = []
48
- for pmid in article_ids:
49
- try:
50
- # Fetch article details
51
- fetch_params = {
52
- "db": "pmc",
53
- "id": pmid,
54
- "retmode": "xml"
55
- }
56
- article_response = requests.get(fetch_url, params=fetch_params)
57
- if not article_response.ok:
58
- continue
59
-
60
- article_soup = BeautifulSoup(article_response.text, 'xml')
61
-
62
- # Extract article data
63
- title_elem = article_soup.find("article-title")
64
- title = clean_text(title_elem.text if title_elem else "No title")
65
-
66
- abstract_elem = article_soup.find("abstract")
67
- abstract = clean_text(abstract_elem.text if abstract_elem else "No abstract")
68
-
69
- authors = []
70
- for author in article_soup.find_all(["author", "contrib"]):
71
- surname = author.find(["surname", "last-name"])
72
- given_name = author.find(["given-names", "first-name"])
73
- if surname:
74
- author_name = surname.text
75
- if given_name:
76
- author_name = f"{given_name.text} {author_name}"
77
- authors.append(clean_text(author_name))
78
-
79
- year_elem = article_soup.find(["pub-date", "year"])
80
- year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"
81
-
82
- journal_elem = article_soup.find(["journal-title", "source"])
83
- journal = clean_text(journal_elem.text if journal_elem else "Unknown Journal")
84
-
85
- articles.append({
86
- "id": pmid,
87
- "title": title,
88
- "authors": authors,
89
- "year": year,
90
- "journal": journal,
91
- "abstract": abstract
92
- })
93
-
94
- # Add delay to avoid rate limiting
95
- time.sleep(0.5)
96
-
97
- except Exception as e:
98
- continue
99
-
100
- return json.dumps(articles, indent=2)
101
-
102
- except Exception as e:
103
- return json.dumps([{"error": f"PMC search failed: {str(e)}"}])
104
-
105
- @tool
106
- def google_scholar_search(query: str) -> str:
107
- """Search alternative sources for medical literature"""
108
- try:
109
- # Use alternative medical literature sources
110
- search_urls = [
111
- f"https://europepmc.org/webservices/rest/search?query={query}&format=json&pageSize=20",
112
- f"https://api.semanticscholar.org/graph/v1/paper/search?query={query}&limit=20&fields=title,abstract,year,authors,venue"
113
- ]
114
-
115
- results = []
116
-
117
- for url in search_urls:
118
- try:
119
- headers = {
120
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
121
- }
122
- response = requests.get(url, headers=headers)
123
-
124
- if not response.ok:
125
- continue
126
-
127
- data = response.json()
128
-
129
- # Handle Europe PMC response
130
- if "resultList" in data:
131
- for result in data["resultList"].get("result", []):
132
- pub = {
133
- "title": clean_text(result.get("title", "No title")),
134
- "authors": [clean_text(author.get("fullName", "Unknown")) for author in result.get("authorList", {}).get("author", [])],
135
- "year": result.get("pubYear", "Unknown"),
136
- "journal": clean_text(result.get("journalTitle", "Unknown Journal")),
137
- "abstract": clean_text(result.get("abstractText", "No abstract")),
138
- "source": "Europe PMC"
139
- }
140
- if pub["title"] != "No title" and pub["abstract"] != "No abstract":
141
- results.append(pub)
142
-
143
- # Handle Semantic Scholar response
144
- elif "data" in data:
145
- for paper in data["data"]:
146
- pub = {
147
- "title": clean_text(paper.get("title", "No title")),
148
- "authors": [clean_text(author.get("name", "Unknown")) for author in paper.get("authors", [])],
149
- "year": paper.get("year", "Unknown"),
150
- "journal": clean_text(paper.get("venue", "Unknown Journal")),
151
- "abstract": clean_text(paper.get("abstract", "No abstract")),
152
- "source": "Semantic Scholar"
153
- }
154
- if pub["title"] != "No title" and pub["abstract"] != "No abstract":
155
- results.append(pub)
156
-
157
- time.sleep(1) # Rate limiting
158
-
159
- except Exception as e:
160
- continue
161
-
162
- return json.dumps(results, indent=2)
163
-
164
- except Exception as e:
165
- return json.dumps([{"error": f"Literature search failed: {str(e)}"}])
166
-
167
- @tool
168
- def today_tool() -> str:
169
- """Get today's date"""
170
- return str(datetime.date.today())