from langchain.tools import tool from bs4 import BeautifulSoup import requests import datetime import json import time import re def clean_text(text): """Clean text from HTML tags and extra whitespace""" if not text: return "" text = re.sub(r'<[^>]+>', '', text) text = re.sub(r'\s+', ' ', text) return text.strip() @tool def pmc_search(query: str) -> str: """Search PubMed Central (PMC) for articles""" try: # Base URLs for PubMed APIs search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" # Search parameters search_params = { "db": "pmc", "term": query, "retmax": 20, "retmode": "json", "sort": "relevance" } # Get article IDs response = requests.get(search_url, params=search_params) if not response.ok: return json.dumps([{"error": "PubMed search failed"}]) try: search_data = response.json() article_ids = search_data.get("esearchresult", {}).get("idlist", []) except: # Fallback to XML parsing if JSON fails soup = BeautifulSoup(response.text, 'xml') article_ids = [id.text for id in soup.find_all('Id')] articles = [] for pmid in article_ids: try: # Fetch article details fetch_params = { "db": "pmc", "id": pmid, "retmode": "xml" } article_response = requests.get(fetch_url, params=fetch_params) if not article_response.ok: continue article_soup = BeautifulSoup(article_response.text, 'xml') # Extract article data title_elem = article_soup.find("article-title") title = clean_text(title_elem.text if title_elem else "No title") abstract_elem = article_soup.find("abstract") abstract = clean_text(abstract_elem.text if abstract_elem else "No abstract") authors = [] for author in article_soup.find_all(["author", "contrib"]): surname = author.find(["surname", "last-name"]) given_name = author.find(["given-names", "first-name"]) if surname: author_name = surname.text if given_name: author_name = f"{given_name.text} {author_name}" authors.append(clean_text(author_name)) year_elem = article_soup.find(["pub-date", "year"]) year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown" journal_elem = article_soup.find(["journal-title", "source"]) journal = clean_text(journal_elem.text if journal_elem else "Unknown Journal") articles.append({ "id": pmid, "title": title, "authors": authors, "year": year, "journal": journal, "abstract": abstract }) # Add delay to avoid rate limiting time.sleep(0.5) except Exception as e: continue return json.dumps(articles, indent=2) except Exception as e: return json.dumps([{"error": f"PMC search failed: {str(e)}"}]) @tool def google_scholar_search(query: str) -> str: """Search alternative sources for medical literature""" try: # Use alternative medical literature sources search_urls = [ f"https://europepmc.org/webservices/rest/search?query={query}&format=json&pageSize=20", f"https://api.semanticscholar.org/graph/v1/paper/search?query={query}&limit=20&fields=title,abstract,year,authors,venue" ] results = [] for url in search_urls: try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers) if not response.ok: continue data = response.json() # Handle Europe PMC response if "resultList" in data: for result in data["resultList"].get("result", []): pub = { "title": clean_text(result.get("title", "No title")), "authors": [clean_text(author.get("fullName", "Unknown")) for author in result.get("authorList", {}).get("author", [])], "year": result.get("pubYear", "Unknown"), "journal": clean_text(result.get("journalTitle", "Unknown Journal")), "abstract": clean_text(result.get("abstractText", "No abstract")), "source": "Europe PMC" } if pub["title"] != "No title" and pub["abstract"] != "No abstract": results.append(pub) # Handle Semantic Scholar response elif "data" in data: for paper in data["data"]: pub = { "title": clean_text(paper.get("title", "No title")), "authors": [clean_text(author.get("name", "Unknown")) for author in paper.get("authors", [])], "year": paper.get("year", "Unknown"), "journal": clean_text(paper.get("venue", "Unknown Journal")), "abstract": clean_text(paper.get("abstract", "No abstract")), "source": "Semantic Scholar" } if pub["title"] != "No title" and pub["abstract"] != "No abstract": results.append(pub) time.sleep(1) # Rate limiting except Exception as e: continue return json.dumps(results, indent=2) except Exception as e: return json.dumps([{"error": f"Literature search failed: {str(e)}"}]) @tool def today_tool() -> str: """Get today's date""" return str(datetime.date.today())