ferferefer commited on
Commit
243a67a
·
verified ·
1 Parent(s): b84600e

Upload 6 files

Browse files
Files changed (6) hide show
  1. agents.py +41 -0
  2. app.py +84 -0
  3. crew.py +22 -0
  4. requirements.txt +8 -0
  5. tasks.py +92 -0
  6. tools.py +158 -0
agents.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Agent
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from tools import pmc_search, google_scholar_search, today_tool
4
+ import os
5
+
6
+ def get_gemini_llm():
7
+ return ChatGoogleGenerativeAI(
8
+ model="gemini-pro",
9
+ google_api_key=os.getenv('GEMINI_API_KEY'),
10
+ temperature=0.5,
11
+ convert_system_message_to_human=True,
12
+ top_p=0.8,
13
+ top_k=40,
14
+ max_output_tokens=2048
15
+ )
16
+
17
+ def get_researcher_agent(verbose):
18
+ return Agent(
19
+ role="Medical Research Scientist",
20
+ goal="""Conduct thorough scientific literature review and synthesize findings into a comprehensive research summary.""",
21
+ backstory="""You are an expert medical research scientist with extensive experience in systematic reviews and meta-analyses.
22
+ You specialize in analyzing clinical studies, understanding research methodologies, and synthesizing evidence from multiple sources.
23
+ Your expertise includes critical appraisal of medical literature, statistical analysis, and identification of key clinical findings.""",
24
+ llm=get_gemini_llm(),
25
+ tools=[pmc_search, google_scholar_search],
26
+ allow_delegation=False,
27
+ verbose=verbose
28
+ )
29
+
30
+ def get_writer_agent(verbose):
31
+ return Agent(
32
+ role="Medical Writer",
33
+ goal="""Transform research findings into a well-structured, comprehensive scientific paper following academic standards.""",
34
+ backstory="""You are an experienced medical writer with expertise in creating high-quality scientific manuscripts.
35
+ You excel at organizing complex medical information, maintaining scientific accuracy, and following academic writing standards.
36
+ Your writing is clear, precise, and always supported by evidence from the literature.""",
37
+ llm=get_gemini_llm(),
38
+ tools=[today_tool],
39
+ allow_delegation=False,
40
+ verbose=verbose
41
+ )
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from crew import get_crew
5
+ import threading
6
+
7
+ # Load environment variables
8
+ load_dotenv()
9
+
10
+ # Global lock for thread safety
11
+ lock = threading.Lock()
12
+
13
+ VERBOSE = False
14
+
15
+ def invoke(topic):
16
+ """Generate scientific paper based on the topic"""
17
+ if not topic:
18
+ raise gr.Error("Topic is required.")
19
+
20
+ with lock:
21
+ # Generate the paper
22
+ crew = get_crew(verbose=VERBOSE, topic=topic)
23
+ paper = str(crew.kickoff())
24
+ return paper
25
+
26
+ # Create the Gradio interface
27
+ css = """
28
+ .gradio-container {
29
+ font-family: 'Arial', sans-serif;
30
+ }
31
+ .paper-output {
32
+ font-size: 16px;
33
+ line-height: 1.6;
34
+ padding: 20px;
35
+ background: #f9f9f9;
36
+ border-radius: 10px;
37
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
38
+ }
39
+ """
40
+
41
+ demo = gr.Interface(
42
+ fn=invoke,
43
+ inputs=[
44
+ gr.Textbox(
45
+ label="Research Topic",
46
+ placeholder="Enter your research topic...",
47
+ lines=2
48
+ )
49
+ ],
50
+ outputs=[
51
+ gr.Markdown(
52
+ label="Generated Scientific Paper",
53
+ elem_classes="paper-output"
54
+ )
55
+ ],
56
+ title="AI Scientific Paper Generator",
57
+ description="""This application uses AI agents to generate comprehensive scientific papers.
58
+ The first agent researches your topic using PubMed Central and Google Scholar,
59
+ collecting at least 20 relevant articles. The second agent then synthesizes this
60
+ research into a well-structured scientific paper with proper citations.
61
+
62
+ Created by Dr. Fernando Ly""",
63
+ article="""### How it works
64
+ 1. Enter your research topic
65
+ 2. The Research Agent will collect relevant scientific articles
66
+ 3. The Writing Agent will generate a structured paper with:
67
+ - Introduction
68
+ - Materials and Methods
69
+ - Results
70
+ - Discussion
71
+ - Conclusion
72
+ - References (APA format)
73
+
74
+ The paper will include proper citations and be based on real scientific literature.""",
75
+ css=css,
76
+ theme=gr.themes.Soft(
77
+ primary_hue="blue",
78
+ secondary_hue="gray"
79
+ )
80
+ )
81
+
82
+ # Launch the app
83
+ if __name__ == "__main__":
84
+ demo.launch()
crew.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Crew, Process
2
+ from agents import get_researcher_agent, get_writer_agent
3
+ from tasks import get_research_task, get_writing_task
4
+
5
+ def get_crew(verbose=False, topic=None):
6
+ # Create Agents
7
+ researcher = get_researcher_agent(verbose)
8
+ writer = get_writer_agent(verbose)
9
+
10
+ # Create Tasks
11
+ research_task = get_research_task(researcher, topic)
12
+ writing_task = get_writing_task(writer, topic)
13
+
14
+ # Create Crew
15
+ crew = Crew(
16
+ agents=[researcher, writer],
17
+ tasks=[research_task, writing_task],
18
+ verbose=verbose,
19
+ process=Process.sequential
20
+ )
21
+
22
+ return crew
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ crewai==0.11.0
2
+ langchain-google-genai==0.0.6
3
+ google-generativeai==0.3.2
4
+ scholarly==1.7.11
5
+ biopython==1.83
6
+ beautifulsoup4==4.12.2
7
+ python-dotenv==1.0.0
8
+ gradio==4.8.0
tasks.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Task
2
+
3
+ def get_research_task(agent, topic):
4
+ return Task(
5
+ description=f"""Research the topic "{topic}" extensively using PMC and Google Scholar.
6
+
7
+ Follow these steps:
8
+ 1. First search PMC using the pmc_search tool
9
+ 2. Then search Google Scholar using the google_scholar_search tool
10
+ 3. For each source, carefully extract and analyze:
11
+ - Title
12
+ - Authors
13
+ - Publication year
14
+ - Key findings from abstract
15
+ - Methodology used
16
+ - Outcomes reported
17
+ 4. Combine and synthesize the information:
18
+ - Identify common themes
19
+ - Note conflicting findings
20
+ - Highlight strongest evidence
21
+ 5. Organize findings by:
22
+ - Study type (RCT, cohort, etc.)
23
+ - Sample size
24
+ - Follow-up duration
25
+ - Outcome measures
26
+
27
+ If search results are limited:
28
+ 1. Try alternative search terms
29
+ 2. Broaden the search scope
30
+ 3. Include related conditions/treatments
31
+
32
+ Output Format:
33
+ 1. Summary of search strategy
34
+ 2. Overview of findings
35
+ 3. Detailed analysis of key studies
36
+ 4. Evidence synthesis
37
+ 5. Knowledge gaps identified""",
38
+ agent=agent,
39
+ expected_output="A comprehensive research summary with detailed analysis of findings from both PMC and Google Scholar."
40
+ )
41
+
42
+ def get_writing_task(agent, topic):
43
+ return Task(
44
+ description=f"""Write a comprehensive scientific paper about "{topic}" based on the research provided.
45
+
46
+ Paper Structure:
47
+ 1. Introduction:
48
+ - Current state of knowledge
49
+ - Clinical significance
50
+ - Research gaps
51
+ - Clear objectives
52
+
53
+ 2. Materials and Methods:
54
+ - Search strategy
55
+ - Inclusion criteria
56
+ - Data extraction process
57
+ - Quality assessment
58
+
59
+ 3. Results (minimum one page):
60
+ - Study characteristics
61
+ - Patient demographics
62
+ - Primary outcomes
63
+ - Secondary outcomes
64
+ - Safety data
65
+ - Meta-analysis if applicable
66
+
67
+ 4. Discussion (minimum one page):
68
+ - Summary of main findings
69
+ - Comparison with existing literature
70
+ - Clinical implications
71
+ - Strengths and limitations
72
+ - Future research directions
73
+
74
+ 5. Conclusion:
75
+ - Key takeaways
76
+ - Clinical recommendations
77
+ - Research priorities
78
+
79
+ 6. References:
80
+ - APA format
81
+ - All cited studies included
82
+
83
+ Writing Guidelines:
84
+ - Use clear, scientific language
85
+ - Support all claims with evidence
86
+ - Include relevant statistics
87
+ - Maintain objectivity
88
+ - Address conflicting findings
89
+ - Consider clinical relevance""",
90
+ agent=agent,
91
+ expected_output="A well-structured scientific paper with comprehensive analysis and proper citations."
92
+ )
tools.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.tools import tool
2
+ from scholarly import scholarly, ProxyGenerator
3
+ from bs4 import BeautifulSoup
4
+ import requests
5
+ import datetime
6
+ import json
7
+ import time
8
+
9
+ # Configure scholarly with proxy to avoid blocking
10
+ pg = ProxyGenerator()
11
+ pg.FreeProxies()
12
+ scholarly.use_proxy(pg)
13
+
14
+ @tool
15
+ def pmc_search(query: str) -> str:
16
+ """Search PubMed Central (PMC) for articles"""
17
+ try:
18
+ # Base URLs for PubMed APIs
19
+ search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
20
+ fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
21
+
22
+ # Search parameters
23
+ search_params = {
24
+ "db": "pmc",
25
+ "term": query,
26
+ "retmax": 20,
27
+ "retmode": "json",
28
+ "sort": "relevance"
29
+ }
30
+
31
+ # Get article IDs
32
+ response = requests.get(search_url, params=search_params)
33
+ if not response.ok:
34
+ return json.dumps([{"error": "PubMed search failed"}])
35
+
36
+ try:
37
+ search_data = response.json()
38
+ article_ids = search_data.get("esearchresult", {}).get("idlist", [])
39
+ except:
40
+ # Fallback to XML parsing if JSON fails
41
+ soup = BeautifulSoup(response.text, 'xml')
42
+ article_ids = [id.text for id in soup.find_all('Id')]
43
+
44
+ articles = []
45
+ for pmid in article_ids:
46
+ try:
47
+ # Fetch article details
48
+ fetch_params = {
49
+ "db": "pmc",
50
+ "id": pmid,
51
+ "retmode": "xml"
52
+ }
53
+ article_response = requests.get(fetch_url, params=fetch_params)
54
+ if not article_response.ok:
55
+ continue
56
+
57
+ article_soup = BeautifulSoup(article_response.text, 'xml')
58
+
59
+ # Extract article data
60
+ title_elem = article_soup.find("article-title")
61
+ title = title_elem.text if title_elem else "No title"
62
+
63
+ abstract_elem = article_soup.find("abstract")
64
+ abstract = abstract_elem.text if abstract_elem else "No abstract"
65
+
66
+ authors = []
67
+ for author in article_soup.find_all(["author", "contrib"]):
68
+ surname = author.find(["surname", "last-name"])
69
+ given_name = author.find(["given-names", "first-name"])
70
+ if surname:
71
+ author_name = surname.text
72
+ if given_name:
73
+ author_name = f"{given_name.text} {author_name}"
74
+ authors.append(author_name)
75
+
76
+ year_elem = article_soup.find(["pub-date", "year"])
77
+ year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"
78
+
79
+ journal_elem = article_soup.find(["journal-title", "source"])
80
+ journal = journal_elem.text if journal_elem else "Unknown Journal"
81
+
82
+ articles.append({
83
+ "id": pmid,
84
+ "title": title,
85
+ "authors": authors,
86
+ "year": year,
87
+ "journal": journal,
88
+ "abstract": abstract
89
+ })
90
+
91
+ # Add delay to avoid rate limiting
92
+ time.sleep(0.5)
93
+
94
+ except Exception as e:
95
+ continue
96
+
97
+ return json.dumps(articles, indent=2)
98
+
99
+ except Exception as e:
100
+ return json.dumps([{"error": f"PMC search failed: {str(e)}"}])
101
+
102
+ @tool
103
+ def google_scholar_search(query: str) -> str:
104
+ """Search Google Scholar for articles"""
105
+ try:
106
+ # Configure proxy and retry mechanism
107
+ if not scholarly.use_proxy(pg):
108
+ pg.FreeProxies()
109
+ scholarly.use_proxy(pg)
110
+
111
+ search_query = scholarly.search_pubs(query)
112
+ results = []
113
+ count = 0
114
+ max_retries = 3
115
+
116
+ while count < 20:
117
+ try:
118
+ result = next(search_query)
119
+
120
+ # Extract publication data
121
+ pub = {
122
+ "title": result.bib.get('title', 'No title'),
123
+ "authors": result.bib.get('author', 'No author').split(" and "),
124
+ "year": result.bib.get('year', 'No year'),
125
+ "abstract": result.bib.get('abstract', 'No abstract'),
126
+ "journal": result.bib.get('journal', result.bib.get('venue', 'No venue')),
127
+ "citations": result.citedby if hasattr(result, 'citedby') else 0
128
+ }
129
+
130
+ # Skip if no title or abstract
131
+ if pub["title"] == 'No title' or pub["abstract"] == 'No abstract':
132
+ continue
133
+
134
+ results.append(pub)
135
+ count += 1
136
+
137
+ # Add delay to avoid rate limiting
138
+ time.sleep(0.5)
139
+
140
+ except StopIteration:
141
+ break
142
+ except Exception as e:
143
+ if max_retries > 0:
144
+ max_retries -= 1
145
+ time.sleep(1)
146
+ continue
147
+ else:
148
+ break
149
+
150
+ return json.dumps(results, indent=2)
151
+
152
+ except Exception as e:
153
+ return json.dumps([{"error": f"Google Scholar search failed: {str(e)}"}])
154
+
155
+ @tool
156
+ def today_tool() -> str:
157
+ """Get today's date"""
158
+ return str(datetime.date.today())