multi-agent-ai-crewai

Sleeping

App Files Files Community

multi-agent-ai-crewai / tools.py

ferferefer

Upload 6 files

243a67a verified about 1 month ago

raw

history blame

5.69 kB

	from langchain.tools import tool
	from scholarly import scholarly, ProxyGenerator
	from bs4 import BeautifulSoup
	import requests
	import datetime
	import json
	import time

	# Configure scholarly with proxy to avoid blocking
	pg = ProxyGenerator()
	pg.FreeProxies()
	scholarly.use_proxy(pg)

	@tool
	def pmc_search(query: str) -> str:
	"""Search PubMed Central (PMC) for articles"""
	try:
	# Base URLs for PubMed APIs
	search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
	fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"

	# Search parameters
	search_params = {
	"db": "pmc",
	"term": query,
	"retmax": 20,
	"retmode": "json",
	"sort": "relevance"
	}

	# Get article IDs
	response = requests.get(search_url, params=search_params)
	if not response.ok:
	return json.dumps([{"error": "PubMed search failed"}])

	try:
	search_data = response.json()
	article_ids = search_data.get("esearchresult", {}).get("idlist", [])
	except:
	# Fallback to XML parsing if JSON fails
	soup = BeautifulSoup(response.text, 'xml')
	article_ids = [id.text for id in soup.find_all('Id')]

	articles = []
	for pmid in article_ids:
	try:
	# Fetch article details
	fetch_params = {
	"db": "pmc",
	"id": pmid,
	"retmode": "xml"
	}
	article_response = requests.get(fetch_url, params=fetch_params)
	if not article_response.ok:
	continue

	article_soup = BeautifulSoup(article_response.text, 'xml')

	# Extract article data
	title_elem = article_soup.find("article-title")
	title = title_elem.text if title_elem else "No title"

	abstract_elem = article_soup.find("abstract")
	abstract = abstract_elem.text if abstract_elem else "No abstract"

	authors = []
	for author in article_soup.find_all(["author", "contrib"]):
	surname = author.find(["surname", "last-name"])
	given_name = author.find(["given-names", "first-name"])
	if surname:
	author_name = surname.text
	if given_name:
	author_name = f"{given_name.text} {author_name}"
	authors.append(author_name)

	year_elem = article_soup.find(["pub-date", "year"])
	year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"

	journal_elem = article_soup.find(["journal-title", "source"])
	journal = journal_elem.text if journal_elem else "Unknown Journal"

	articles.append({
	"id": pmid,
	"title": title,
	"authors": authors,
	"year": year,
	"journal": journal,
	"abstract": abstract
	})

	# Add delay to avoid rate limiting
	time.sleep(0.5)

	except Exception as e:
	continue

	return json.dumps(articles, indent=2)

	except Exception as e:
	return json.dumps([{"error": f"PMC search failed: {str(e)}"}])

	@tool
	def google_scholar_search(query: str) -> str:
	"""Search Google Scholar for articles"""
	try:
	# Configure proxy and retry mechanism
	if not scholarly.use_proxy(pg):
	pg.FreeProxies()
	scholarly.use_proxy(pg)

	search_query = scholarly.search_pubs(query)
	results = []
	count = 0
	max_retries = 3

	while count < 20:
	try:
	result = next(search_query)

	# Extract publication data
	pub = {
	"title": result.bib.get('title', 'No title'),
	"authors": result.bib.get('author', 'No author').split(" and "),
	"year": result.bib.get('year', 'No year'),
	"abstract": result.bib.get('abstract', 'No abstract'),
	"journal": result.bib.get('journal', result.bib.get('venue', 'No venue')),
	"citations": result.citedby if hasattr(result, 'citedby') else 0
	}

	# Skip if no title or abstract
	if pub["title"] == 'No title' or pub["abstract"] == 'No abstract':
	continue

	results.append(pub)
	count += 1

	# Add delay to avoid rate limiting
	time.sleep(0.5)

	except StopIteration:
	break
	except Exception as e:
	if max_retries > 0:
	max_retries -= 1
	time.sleep(1)
	continue
	else:
	break

	return json.dumps(results, indent=2)

	except Exception as e:
	return json.dumps([{"error": f"Google Scholar search failed: {str(e)}"}])

	@tool
	def today_tool() -> str:
	"""Get today's date"""
	return str(datetime.date.today())