ferferefer's picture
Upload 2 files
aaa7944 verified
from langchain.tools import tool
from bs4 import BeautifulSoup
import requests
import datetime
import json
import time
import re
def clean_text(text):
"""Clean text from HTML tags and extra whitespace"""
if not text:
return ""
text = re.sub(r'<[^>]+>', '', text)
text = re.sub(r'\s+', ' ', text)
return text.strip()
@tool
def pmc_search(query: str) -> str:
"""Search PubMed Central (PMC) for articles"""
try:
# Base URLs for PubMed APIs
search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
# Search parameters
search_params = {
"db": "pmc",
"term": query,
"retmax": 20,
"retmode": "json",
"sort": "relevance"
}
# Get article IDs
response = requests.get(search_url, params=search_params)
if not response.ok:
return json.dumps([{"error": "PubMed search failed"}])
try:
search_data = response.json()
article_ids = search_data.get("esearchresult", {}).get("idlist", [])
except:
# Fallback to XML parsing if JSON fails
soup = BeautifulSoup(response.text, 'xml')
article_ids = [id.text for id in soup.find_all('Id')]
articles = []
for pmid in article_ids:
try:
# Fetch article details
fetch_params = {
"db": "pmc",
"id": pmid,
"retmode": "xml"
}
article_response = requests.get(fetch_url, params=fetch_params)
if not article_response.ok:
continue
article_soup = BeautifulSoup(article_response.text, 'xml')
# Extract article data
title_elem = article_soup.find("article-title")
title = clean_text(title_elem.text if title_elem else "No title")
abstract_elem = article_soup.find("abstract")
abstract = clean_text(abstract_elem.text if abstract_elem else "No abstract")
authors = []
for author in article_soup.find_all(["author", "contrib"]):
surname = author.find(["surname", "last-name"])
given_name = author.find(["given-names", "first-name"])
if surname:
author_name = surname.text
if given_name:
author_name = f"{given_name.text} {author_name}"
authors.append(clean_text(author_name))
year_elem = article_soup.find(["pub-date", "year"])
year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"
journal_elem = article_soup.find(["journal-title", "source"])
journal = clean_text(journal_elem.text if journal_elem else "Unknown Journal")
articles.append({
"id": pmid,
"title": title,
"authors": authors,
"year": year,
"journal": journal,
"abstract": abstract
})
# Add delay to avoid rate limiting
time.sleep(0.5)
except Exception as e:
continue
return json.dumps(articles, indent=2)
except Exception as e:
return json.dumps([{"error": f"PMC search failed: {str(e)}"}])
@tool
def google_scholar_search(query: str) -> str:
"""Search alternative sources for medical literature"""
try:
# Use alternative medical literature sources
search_urls = [
f"https://europepmc.org/webservices/rest/search?query={query}&format=json&pageSize=20",
f"https://api.semanticscholar.org/graph/v1/paper/search?query={query}&limit=20&fields=title,abstract,year,authors,venue"
]
results = []
for url in search_urls:
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers)
if not response.ok:
continue
data = response.json()
# Handle Europe PMC response
if "resultList" in data:
for result in data["resultList"].get("result", []):
pub = {
"title": clean_text(result.get("title", "No title")),
"authors": [clean_text(author.get("fullName", "Unknown")) for author in result.get("authorList", {}).get("author", [])],
"year": result.get("pubYear", "Unknown"),
"journal": clean_text(result.get("journalTitle", "Unknown Journal")),
"abstract": clean_text(result.get("abstractText", "No abstract")),
"source": "Europe PMC"
}
if pub["title"] != "No title" and pub["abstract"] != "No abstract":
results.append(pub)
# Handle Semantic Scholar response
elif "data" in data:
for paper in data["data"]:
pub = {
"title": clean_text(paper.get("title", "No title")),
"authors": [clean_text(author.get("name", "Unknown")) for author in paper.get("authors", [])],
"year": paper.get("year", "Unknown"),
"journal": clean_text(paper.get("venue", "Unknown Journal")),
"abstract": clean_text(paper.get("abstract", "No abstract")),
"source": "Semantic Scholar"
}
if pub["title"] != "No title" and pub["abstract"] != "No abstract":
results.append(pub)
time.sleep(1) # Rate limiting
except Exception as e:
continue
return json.dumps(results, indent=2)
except Exception as e:
return json.dumps([{"error": f"Literature search failed: {str(e)}"}])
@tool
def today_tool() -> str:
"""Get today's date"""
return str(datetime.date.today())