Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files
tasks.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from crewai import Task
|
2 |
+
|
3 |
+
def get_research_task(agent, topic):
|
4 |
+
return Task(
|
5 |
+
description=f"""Research the topic "{topic}" extensively using PMC and Google Scholar.
|
6 |
+
|
7 |
+
Follow these steps:
|
8 |
+
1. First search PMC using the pmc_search tool
|
9 |
+
2. Then search Google Scholar using the google_scholar_search tool
|
10 |
+
3. For each source, carefully extract and analyze:
|
11 |
+
- Title
|
12 |
+
- Authors (full names for APA citation)
|
13 |
+
- Publication year
|
14 |
+
- Journal name (for APA citation)
|
15 |
+
- Volume/Issue (if available)
|
16 |
+
- DOI or URL (if available)
|
17 |
+
- Key findings from abstract
|
18 |
+
- Methodology used
|
19 |
+
- Outcomes reported
|
20 |
+
4. Combine and synthesize the information:
|
21 |
+
- Identify common themes
|
22 |
+
- Note conflicting findings
|
23 |
+
- Highlight strongest evidence
|
24 |
+
5. Organize findings by:
|
25 |
+
- Study type (RCT, cohort, etc.)
|
26 |
+
- Sample size
|
27 |
+
- Follow-up duration
|
28 |
+
- Outcome measures
|
29 |
+
|
30 |
+
If search results are limited:
|
31 |
+
1. Try alternative search terms
|
32 |
+
2. Broaden the search scope
|
33 |
+
3. Include related conditions/treatments
|
34 |
+
|
35 |
+
Output Format:
|
36 |
+
1. Summary of search strategy
|
37 |
+
2. Overview of findings
|
38 |
+
3. Detailed analysis of key studies
|
39 |
+
4. Evidence synthesis
|
40 |
+
5. Knowledge gaps identified
|
41 |
+
6. Complete reference list in APA format for ALL sources
|
42 |
+
Format: Author(s) (Year). Title. Journal, Volume(Issue), Pages. DOI/URL
|
43 |
+
Example: Smith, J. D., & Jones, M. R. (2023). Study findings. Journal of Medicine, 45(2), 123-145. https://doi.org/10.1000/med.2023""",
|
44 |
+
agent=agent,
|
45 |
+
expected_output="A comprehensive research summary with detailed analysis and APA-formatted references for all sources."
|
46 |
+
)
|
47 |
+
|
48 |
+
def get_writing_task(agent, topic):
|
49 |
+
return Task(
|
50 |
+
description=f"""Write a comprehensive scientific paper about "{topic}" based on the research provided.
|
51 |
+
|
52 |
+
Paper Structure:
|
53 |
+
1. Introduction (500-750 words):
|
54 |
+
- Current state of knowledge
|
55 |
+
- Clinical significance
|
56 |
+
- Research gaps
|
57 |
+
- Clear objectives
|
58 |
+
- Include at least 5 in-text citations
|
59 |
+
|
60 |
+
2. Materials and Methods (500-750 words):
|
61 |
+
- Search strategy
|
62 |
+
- Inclusion criteria
|
63 |
+
- Data extraction process
|
64 |
+
- Quality assessment
|
65 |
+
- Include methodology citations
|
66 |
+
|
67 |
+
3. Results (MINIMUM 1000 words):
|
68 |
+
- Study characteristics
|
69 |
+
- Patient demographics
|
70 |
+
- Primary outcomes
|
71 |
+
- Secondary outcomes
|
72 |
+
- Safety data
|
73 |
+
- Meta-analysis if applicable
|
74 |
+
- Include specific data from at least 10 studies
|
75 |
+
- Use in-text citations for every finding
|
76 |
+
|
77 |
+
4. Discussion (MINIMUM 1000 words):
|
78 |
+
- Summary of main findings
|
79 |
+
- Comparison with existing literature
|
80 |
+
- Clinical implications
|
81 |
+
- Strengths and limitations
|
82 |
+
- Future research directions
|
83 |
+
- Include at least 15 in-text citations
|
84 |
+
- Compare and contrast findings from different studies
|
85 |
+
- Address conflicting results
|
86 |
+
- Discuss potential mechanisms
|
87 |
+
|
88 |
+
5. Conclusion (250-500 words):
|
89 |
+
- Key takeaways
|
90 |
+
- Clinical recommendations
|
91 |
+
- Research priorities
|
92 |
+
|
93 |
+
6. References:
|
94 |
+
- MUST list ALL cited articles (minimum 20)
|
95 |
+
- Use proper APA format
|
96 |
+
- Include DOI/URL when available
|
97 |
+
- Alphabetical order
|
98 |
+
|
99 |
+
Writing Guidelines:
|
100 |
+
- Use clear, scientific language
|
101 |
+
- Support EVERY claim with citations
|
102 |
+
- Include relevant statistics
|
103 |
+
- Maintain objectivity
|
104 |
+
- Address conflicting findings
|
105 |
+
- Consider clinical relevance
|
106 |
+
|
107 |
+
Citation Format:
|
108 |
+
- In-text: (Author et al., Year) or Author et al. (Year)
|
109 |
+
- For 2 authors: (Smith & Jones, 2023)
|
110 |
+
- For 3+ authors: (Smith et al., 2023)
|
111 |
+
- Multiple citations: (Smith et al., 2023; Jones et al., 2022)
|
112 |
+
|
113 |
+
IMPORTANT:
|
114 |
+
- Results and Discussion MUST be at least 1000 words each
|
115 |
+
- EVERY paragraph should include at least one citation
|
116 |
+
- ALL sources from the research must be cited and listed
|
117 |
+
- Check that all in-text citations have corresponding references""",
|
118 |
+
agent=agent,
|
119 |
+
expected_output="A well-structured scientific paper with comprehensive analysis, proper citations, and complete reference list."
|
120 |
+
)
|
tools.py
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.tools import tool
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
import requests
|
4 |
+
import datetime
|
5 |
+
import json
|
6 |
+
import time
|
7 |
+
import re
|
8 |
+
|
9 |
+
def clean_text(text):
|
10 |
+
"""Clean text from HTML tags and extra whitespace"""
|
11 |
+
if not text:
|
12 |
+
return ""
|
13 |
+
text = re.sub(r'<[^>]+>', '', text)
|
14 |
+
text = re.sub(r'\s+', ' ', text)
|
15 |
+
return text.strip()
|
16 |
+
|
17 |
+
@tool
|
18 |
+
def pmc_search(query: str) -> str:
|
19 |
+
"""Search PubMed Central (PMC) for articles"""
|
20 |
+
try:
|
21 |
+
# Base URLs for PubMed APIs
|
22 |
+
search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
23 |
+
fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
24 |
+
|
25 |
+
# Search parameters
|
26 |
+
search_params = {
|
27 |
+
"db": "pmc",
|
28 |
+
"term": query,
|
29 |
+
"retmax": 20,
|
30 |
+
"retmode": "json",
|
31 |
+
"sort": "relevance"
|
32 |
+
}
|
33 |
+
|
34 |
+
# Get article IDs
|
35 |
+
response = requests.get(search_url, params=search_params)
|
36 |
+
if not response.ok:
|
37 |
+
return json.dumps([{"error": "PubMed search failed"}])
|
38 |
+
|
39 |
+
try:
|
40 |
+
search_data = response.json()
|
41 |
+
article_ids = search_data.get("esearchresult", {}).get("idlist", [])
|
42 |
+
except:
|
43 |
+
# Fallback to XML parsing if JSON fails
|
44 |
+
soup = BeautifulSoup(response.text, 'xml')
|
45 |
+
article_ids = [id.text for id in soup.find_all('Id')]
|
46 |
+
|
47 |
+
articles = []
|
48 |
+
for pmid in article_ids:
|
49 |
+
try:
|
50 |
+
# Fetch article details
|
51 |
+
fetch_params = {
|
52 |
+
"db": "pmc",
|
53 |
+
"id": pmid,
|
54 |
+
"retmode": "xml"
|
55 |
+
}
|
56 |
+
article_response = requests.get(fetch_url, params=fetch_params)
|
57 |
+
if not article_response.ok:
|
58 |
+
continue
|
59 |
+
|
60 |
+
article_soup = BeautifulSoup(article_response.text, 'xml')
|
61 |
+
|
62 |
+
# Extract article data
|
63 |
+
title_elem = article_soup.find("article-title")
|
64 |
+
title = clean_text(title_elem.text if title_elem else "No title")
|
65 |
+
|
66 |
+
abstract_elem = article_soup.find("abstract")
|
67 |
+
abstract = clean_text(abstract_elem.text if abstract_elem else "No abstract")
|
68 |
+
|
69 |
+
authors = []
|
70 |
+
for author in article_soup.find_all(["author", "contrib"]):
|
71 |
+
surname = author.find(["surname", "last-name"])
|
72 |
+
given_name = author.find(["given-names", "first-name"])
|
73 |
+
if surname:
|
74 |
+
author_name = surname.text
|
75 |
+
if given_name:
|
76 |
+
author_name = f"{given_name.text} {author_name}"
|
77 |
+
authors.append(clean_text(author_name))
|
78 |
+
|
79 |
+
year_elem = article_soup.find(["pub-date", "year"])
|
80 |
+
year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"
|
81 |
+
|
82 |
+
journal_elem = article_soup.find(["journal-title", "source"])
|
83 |
+
journal = clean_text(journal_elem.text if journal_elem else "Unknown Journal")
|
84 |
+
|
85 |
+
articles.append({
|
86 |
+
"id": pmid,
|
87 |
+
"title": title,
|
88 |
+
"authors": authors,
|
89 |
+
"year": year,
|
90 |
+
"journal": journal,
|
91 |
+
"abstract": abstract
|
92 |
+
})
|
93 |
+
|
94 |
+
# Add delay to avoid rate limiting
|
95 |
+
time.sleep(0.5)
|
96 |
+
|
97 |
+
except Exception as e:
|
98 |
+
continue
|
99 |
+
|
100 |
+
return json.dumps(articles, indent=2)
|
101 |
+
|
102 |
+
except Exception as e:
|
103 |
+
return json.dumps([{"error": f"PMC search failed: {str(e)}"}])
|
104 |
+
|
105 |
+
@tool
|
106 |
+
def google_scholar_search(query: str) -> str:
|
107 |
+
"""Search alternative sources for medical literature"""
|
108 |
+
try:
|
109 |
+
# Use alternative medical literature sources
|
110 |
+
search_urls = [
|
111 |
+
f"https://europepmc.org/webservices/rest/search?query={query}&format=json&pageSize=20",
|
112 |
+
f"https://api.semanticscholar.org/graph/v1/paper/search?query={query}&limit=20&fields=title,abstract,year,authors,venue"
|
113 |
+
]
|
114 |
+
|
115 |
+
results = []
|
116 |
+
|
117 |
+
for url in search_urls:
|
118 |
+
try:
|
119 |
+
headers = {
|
120 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
121 |
+
}
|
122 |
+
response = requests.get(url, headers=headers)
|
123 |
+
|
124 |
+
if not response.ok:
|
125 |
+
continue
|
126 |
+
|
127 |
+
data = response.json()
|
128 |
+
|
129 |
+
# Handle Europe PMC response
|
130 |
+
if "resultList" in data:
|
131 |
+
for result in data["resultList"].get("result", []):
|
132 |
+
pub = {
|
133 |
+
"title": clean_text(result.get("title", "No title")),
|
134 |
+
"authors": [clean_text(author.get("fullName", "Unknown")) for author in result.get("authorList", {}).get("author", [])],
|
135 |
+
"year": result.get("pubYear", "Unknown"),
|
136 |
+
"journal": clean_text(result.get("journalTitle", "Unknown Journal")),
|
137 |
+
"abstract": clean_text(result.get("abstractText", "No abstract")),
|
138 |
+
"source": "Europe PMC"
|
139 |
+
}
|
140 |
+
if pub["title"] != "No title" and pub["abstract"] != "No abstract":
|
141 |
+
results.append(pub)
|
142 |
+
|
143 |
+
# Handle Semantic Scholar response
|
144 |
+
elif "data" in data:
|
145 |
+
for paper in data["data"]:
|
146 |
+
pub = {
|
147 |
+
"title": clean_text(paper.get("title", "No title")),
|
148 |
+
"authors": [clean_text(author.get("name", "Unknown")) for author in paper.get("authors", [])],
|
149 |
+
"year": paper.get("year", "Unknown"),
|
150 |
+
"journal": clean_text(paper.get("venue", "Unknown Journal")),
|
151 |
+
"abstract": clean_text(paper.get("abstract", "No abstract")),
|
152 |
+
"source": "Semantic Scholar"
|
153 |
+
}
|
154 |
+
if pub["title"] != "No title" and pub["abstract"] != "No abstract":
|
155 |
+
results.append(pub)
|
156 |
+
|
157 |
+
time.sleep(1) # Rate limiting
|
158 |
+
|
159 |
+
except Exception as e:
|
160 |
+
continue
|
161 |
+
|
162 |
+
return json.dumps(results, indent=2)
|
163 |
+
|
164 |
+
except Exception as e:
|
165 |
+
return json.dumps([{"error": f"Literature search failed: {str(e)}"}])
|
166 |
+
|
167 |
+
@tool
|
168 |
+
def today_tool() -> str:
|
169 |
+
"""Get today's date"""
|
170 |
+
return str(datetime.date.today())
|