multi-agent-ai-crewai

Sleeping

App Files Files Community

multi-agent-ai-crewai / tools.py

ferferefer

Upload 2 files

aaa7944 verified about 1 month ago

raw

history blame contribute delete

6.94 kB

	from langchain.tools import tool
	from bs4 import BeautifulSoup
	import requests
	import datetime
	import json
	import time
	import re

	def clean_text(text):
	"""Clean text from HTML tags and extra whitespace"""
	if not text:
	return ""
	text = re.sub(r'<[^>]+>', '', text)
	text = re.sub(r'\s+', ' ', text)
	return text.strip()

	@tool
	def pmc_search(query: str) -> str:
	"""Search PubMed Central (PMC) for articles"""
	try:
	# Base URLs for PubMed APIs
	search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
	fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"

	# Search parameters
	search_params = {
	"db": "pmc",
	"term": query,
	"retmax": 20,
	"retmode": "json",
	"sort": "relevance"
	}

	# Get article IDs
	response = requests.get(search_url, params=search_params)
	if not response.ok:
	return json.dumps([{"error": "PubMed search failed"}])

	try:
	search_data = response.json()
	article_ids = search_data.get("esearchresult", {}).get("idlist", [])
	except:
	# Fallback to XML parsing if JSON fails
	soup = BeautifulSoup(response.text, 'xml')
	article_ids = [id.text for id in soup.find_all('Id')]

	articles = []
	for pmid in article_ids:
	try:
	# Fetch article details
	fetch_params = {
	"db": "pmc",
	"id": pmid,
	"retmode": "xml"
	}
	article_response = requests.get(fetch_url, params=fetch_params)
	if not article_response.ok:
	continue

	article_soup = BeautifulSoup(article_response.text, 'xml')

	# Extract article data
	title_elem = article_soup.find("article-title")
	title = clean_text(title_elem.text if title_elem else "No title")

	abstract_elem = article_soup.find("abstract")
	abstract = clean_text(abstract_elem.text if abstract_elem else "No abstract")

	authors = []
	for author in article_soup.find_all(["author", "contrib"]):
	surname = author.find(["surname", "last-name"])
	given_name = author.find(["given-names", "first-name"])
	if surname:
	author_name = surname.text
	if given_name:
	author_name = f"{given_name.text} {author_name}"
	authors.append(clean_text(author_name))

	year_elem = article_soup.find(["pub-date", "year"])
	year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"

	journal_elem = article_soup.find(["journal-title", "source"])
	journal = clean_text(journal_elem.text if journal_elem else "Unknown Journal")

	articles.append({
	"id": pmid,
	"title": title,
	"authors": authors,
	"year": year,
	"journal": journal,
	"abstract": abstract
	})

	# Add delay to avoid rate limiting
	time.sleep(0.5)

	except Exception as e:
	continue

	return json.dumps(articles, indent=2)

	except Exception as e:
	return json.dumps([{"error": f"PMC search failed: {str(e)}"}])

	@tool
	def google_scholar_search(query: str) -> str:
	"""Search alternative sources for medical literature"""
	try:
	# Use alternative medical literature sources
	search_urls = [
	f"https://europepmc.org/webservices/rest/search?query={query}&format=json&pageSize=20",
	f"https://api.semanticscholar.org/graph/v1/paper/search?query={query}&limit=20&fields=title,abstract,year,authors,venue"
	]

	results = []

	for url in search_urls:
	try:
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	}
	response = requests.get(url, headers=headers)

	if not response.ok:
	continue

	data = response.json()

	# Handle Europe PMC response
	if "resultList" in data:
	for result in data["resultList"].get("result", []):
	pub = {
	"title": clean_text(result.get("title", "No title")),
	"authors": [clean_text(author.get("fullName", "Unknown")) for author in result.get("authorList", {}).get("author", [])],
	"year": result.get("pubYear", "Unknown"),
	"journal": clean_text(result.get("journalTitle", "Unknown Journal")),
	"abstract": clean_text(result.get("abstractText", "No abstract")),
	"source": "Europe PMC"
	}
	if pub["title"] != "No title" and pub["abstract"] != "No abstract":
	results.append(pub)

	# Handle Semantic Scholar response
	elif "data" in data:
	for paper in data["data"]:
	pub = {
	"title": clean_text(paper.get("title", "No title")),
	"authors": [clean_text(author.get("name", "Unknown")) for author in paper.get("authors", [])],
	"year": paper.get("year", "Unknown"),
	"journal": clean_text(paper.get("venue", "Unknown Journal")),
	"abstract": clean_text(paper.get("abstract", "No abstract")),
	"source": "Semantic Scholar"
	}
	if pub["title"] != "No title" and pub["abstract"] != "No abstract":
	results.append(pub)

	time.sleep(1) # Rate limiting

	except Exception as e:
	continue

	return json.dumps(results, indent=2)

	except Exception as e:
	return json.dumps([{"error": f"Literature search failed: {str(e)}"}])

	@tool
	def today_tool() -> str:
	"""Get today's date"""
	return str(datetime.date.today())