ferferefer commited on
Commit
aaa7944
·
verified ·
1 Parent(s): 8b9e744

Upload 2 files

Browse files
Files changed (2) hide show
  1. tasks.py +120 -0
  2. tools.py +170 -0
tasks.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Task
2
+
3
+ def get_research_task(agent, topic):
4
+ return Task(
5
+ description=f"""Research the topic "{topic}" extensively using PMC and Google Scholar.
6
+
7
+ Follow these steps:
8
+ 1. First search PMC using the pmc_search tool
9
+ 2. Then search Google Scholar using the google_scholar_search tool
10
+ 3. For each source, carefully extract and analyze:
11
+ - Title
12
+ - Authors (full names for APA citation)
13
+ - Publication year
14
+ - Journal name (for APA citation)
15
+ - Volume/Issue (if available)
16
+ - DOI or URL (if available)
17
+ - Key findings from abstract
18
+ - Methodology used
19
+ - Outcomes reported
20
+ 4. Combine and synthesize the information:
21
+ - Identify common themes
22
+ - Note conflicting findings
23
+ - Highlight strongest evidence
24
+ 5. Organize findings by:
25
+ - Study type (RCT, cohort, etc.)
26
+ - Sample size
27
+ - Follow-up duration
28
+ - Outcome measures
29
+
30
+ If search results are limited:
31
+ 1. Try alternative search terms
32
+ 2. Broaden the search scope
33
+ 3. Include related conditions/treatments
34
+
35
+ Output Format:
36
+ 1. Summary of search strategy
37
+ 2. Overview of findings
38
+ 3. Detailed analysis of key studies
39
+ 4. Evidence synthesis
40
+ 5. Knowledge gaps identified
41
+ 6. Complete reference list in APA format for ALL sources
42
+ Format: Author(s) (Year). Title. Journal, Volume(Issue), Pages. DOI/URL
43
+ Example: Smith, J. D., & Jones, M. R. (2023). Study findings. Journal of Medicine, 45(2), 123-145. https://doi.org/10.1000/med.2023""",
44
+ agent=agent,
45
+ expected_output="A comprehensive research summary with detailed analysis and APA-formatted references for all sources."
46
+ )
47
+
48
+ def get_writing_task(agent, topic):
49
+ return Task(
50
+ description=f"""Write a comprehensive scientific paper about "{topic}" based on the research provided.
51
+
52
+ Paper Structure:
53
+ 1. Introduction (500-750 words):
54
+ - Current state of knowledge
55
+ - Clinical significance
56
+ - Research gaps
57
+ - Clear objectives
58
+ - Include at least 5 in-text citations
59
+
60
+ 2. Materials and Methods (500-750 words):
61
+ - Search strategy
62
+ - Inclusion criteria
63
+ - Data extraction process
64
+ - Quality assessment
65
+ - Include methodology citations
66
+
67
+ 3. Results (MINIMUM 1000 words):
68
+ - Study characteristics
69
+ - Patient demographics
70
+ - Primary outcomes
71
+ - Secondary outcomes
72
+ - Safety data
73
+ - Meta-analysis if applicable
74
+ - Include specific data from at least 10 studies
75
+ - Use in-text citations for every finding
76
+
77
+ 4. Discussion (MINIMUM 1000 words):
78
+ - Summary of main findings
79
+ - Comparison with existing literature
80
+ - Clinical implications
81
+ - Strengths and limitations
82
+ - Future research directions
83
+ - Include at least 15 in-text citations
84
+ - Compare and contrast findings from different studies
85
+ - Address conflicting results
86
+ - Discuss potential mechanisms
87
+
88
+ 5. Conclusion (250-500 words):
89
+ - Key takeaways
90
+ - Clinical recommendations
91
+ - Research priorities
92
+
93
+ 6. References:
94
+ - MUST list ALL cited articles (minimum 20)
95
+ - Use proper APA format
96
+ - Include DOI/URL when available
97
+ - Alphabetical order
98
+
99
+ Writing Guidelines:
100
+ - Use clear, scientific language
101
+ - Support EVERY claim with citations
102
+ - Include relevant statistics
103
+ - Maintain objectivity
104
+ - Address conflicting findings
105
+ - Consider clinical relevance
106
+
107
+ Citation Format:
108
+ - In-text: (Author et al., Year) or Author et al. (Year)
109
+ - For 2 authors: (Smith & Jones, 2023)
110
+ - For 3+ authors: (Smith et al., 2023)
111
+ - Multiple citations: (Smith et al., 2023; Jones et al., 2022)
112
+
113
+ IMPORTANT:
114
+ - Results and Discussion MUST be at least 1000 words each
115
+ - EVERY paragraph should include at least one citation
116
+ - ALL sources from the research must be cited and listed
117
+ - Check that all in-text citations have corresponding references""",
118
+ agent=agent,
119
+ expected_output="A well-structured scientific paper with comprehensive analysis, proper citations, and complete reference list."
120
+ )
tools.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.tools import tool
2
+ from bs4 import BeautifulSoup
3
+ import requests
4
+ import datetime
5
+ import json
6
+ import time
7
+ import re
8
+
9
+ def clean_text(text):
10
+ """Clean text from HTML tags and extra whitespace"""
11
+ if not text:
12
+ return ""
13
+ text = re.sub(r'<[^>]+>', '', text)
14
+ text = re.sub(r'\s+', ' ', text)
15
+ return text.strip()
16
+
17
+ @tool
18
+ def pmc_search(query: str) -> str:
19
+ """Search PubMed Central (PMC) for articles"""
20
+ try:
21
+ # Base URLs for PubMed APIs
22
+ search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
23
+ fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
24
+
25
+ # Search parameters
26
+ search_params = {
27
+ "db": "pmc",
28
+ "term": query,
29
+ "retmax": 20,
30
+ "retmode": "json",
31
+ "sort": "relevance"
32
+ }
33
+
34
+ # Get article IDs
35
+ response = requests.get(search_url, params=search_params)
36
+ if not response.ok:
37
+ return json.dumps([{"error": "PubMed search failed"}])
38
+
39
+ try:
40
+ search_data = response.json()
41
+ article_ids = search_data.get("esearchresult", {}).get("idlist", [])
42
+ except:
43
+ # Fallback to XML parsing if JSON fails
44
+ soup = BeautifulSoup(response.text, 'xml')
45
+ article_ids = [id.text for id in soup.find_all('Id')]
46
+
47
+ articles = []
48
+ for pmid in article_ids:
49
+ try:
50
+ # Fetch article details
51
+ fetch_params = {
52
+ "db": "pmc",
53
+ "id": pmid,
54
+ "retmode": "xml"
55
+ }
56
+ article_response = requests.get(fetch_url, params=fetch_params)
57
+ if not article_response.ok:
58
+ continue
59
+
60
+ article_soup = BeautifulSoup(article_response.text, 'xml')
61
+
62
+ # Extract article data
63
+ title_elem = article_soup.find("article-title")
64
+ title = clean_text(title_elem.text if title_elem else "No title")
65
+
66
+ abstract_elem = article_soup.find("abstract")
67
+ abstract = clean_text(abstract_elem.text if abstract_elem else "No abstract")
68
+
69
+ authors = []
70
+ for author in article_soup.find_all(["author", "contrib"]):
71
+ surname = author.find(["surname", "last-name"])
72
+ given_name = author.find(["given-names", "first-name"])
73
+ if surname:
74
+ author_name = surname.text
75
+ if given_name:
76
+ author_name = f"{given_name.text} {author_name}"
77
+ authors.append(clean_text(author_name))
78
+
79
+ year_elem = article_soup.find(["pub-date", "year"])
80
+ year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"
81
+
82
+ journal_elem = article_soup.find(["journal-title", "source"])
83
+ journal = clean_text(journal_elem.text if journal_elem else "Unknown Journal")
84
+
85
+ articles.append({
86
+ "id": pmid,
87
+ "title": title,
88
+ "authors": authors,
89
+ "year": year,
90
+ "journal": journal,
91
+ "abstract": abstract
92
+ })
93
+
94
+ # Add delay to avoid rate limiting
95
+ time.sleep(0.5)
96
+
97
+ except Exception as e:
98
+ continue
99
+
100
+ return json.dumps(articles, indent=2)
101
+
102
+ except Exception as e:
103
+ return json.dumps([{"error": f"PMC search failed: {str(e)}"}])
104
+
105
+ @tool
106
+ def google_scholar_search(query: str) -> str:
107
+ """Search alternative sources for medical literature"""
108
+ try:
109
+ # Use alternative medical literature sources
110
+ search_urls = [
111
+ f"https://europepmc.org/webservices/rest/search?query={query}&format=json&pageSize=20",
112
+ f"https://api.semanticscholar.org/graph/v1/paper/search?query={query}&limit=20&fields=title,abstract,year,authors,venue"
113
+ ]
114
+
115
+ results = []
116
+
117
+ for url in search_urls:
118
+ try:
119
+ headers = {
120
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
121
+ }
122
+ response = requests.get(url, headers=headers)
123
+
124
+ if not response.ok:
125
+ continue
126
+
127
+ data = response.json()
128
+
129
+ # Handle Europe PMC response
130
+ if "resultList" in data:
131
+ for result in data["resultList"].get("result", []):
132
+ pub = {
133
+ "title": clean_text(result.get("title", "No title")),
134
+ "authors": [clean_text(author.get("fullName", "Unknown")) for author in result.get("authorList", {}).get("author", [])],
135
+ "year": result.get("pubYear", "Unknown"),
136
+ "journal": clean_text(result.get("journalTitle", "Unknown Journal")),
137
+ "abstract": clean_text(result.get("abstractText", "No abstract")),
138
+ "source": "Europe PMC"
139
+ }
140
+ if pub["title"] != "No title" and pub["abstract"] != "No abstract":
141
+ results.append(pub)
142
+
143
+ # Handle Semantic Scholar response
144
+ elif "data" in data:
145
+ for paper in data["data"]:
146
+ pub = {
147
+ "title": clean_text(paper.get("title", "No title")),
148
+ "authors": [clean_text(author.get("name", "Unknown")) for author in paper.get("authors", [])],
149
+ "year": paper.get("year", "Unknown"),
150
+ "journal": clean_text(paper.get("venue", "Unknown Journal")),
151
+ "abstract": clean_text(paper.get("abstract", "No abstract")),
152
+ "source": "Semantic Scholar"
153
+ }
154
+ if pub["title"] != "No title" and pub["abstract"] != "No abstract":
155
+ results.append(pub)
156
+
157
+ time.sleep(1) # Rate limiting
158
+
159
+ except Exception as e:
160
+ continue
161
+
162
+ return json.dumps(results, indent=2)
163
+
164
+ except Exception as e:
165
+ return json.dumps([{"error": f"Literature search failed: {str(e)}"}])
166
+
167
+ @tool
168
+ def today_tool() -> str:
169
+ """Get today's date"""
170
+ return str(datetime.date.today())