Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files
agents.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from crewai import Agent
|
2 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
3 |
+
from tools import pmc_search, google_scholar_search, today_tool
|
4 |
+
import os
|
5 |
+
|
6 |
+
def get_gemini_llm():
|
7 |
+
return ChatGoogleGenerativeAI(
|
8 |
+
model="gemini-pro",
|
9 |
+
google_api_key=os.getenv('GEMINI_API_KEY'),
|
10 |
+
temperature=0.5,
|
11 |
+
convert_system_message_to_human=True,
|
12 |
+
top_p=0.8,
|
13 |
+
top_k=40,
|
14 |
+
max_output_tokens=2048
|
15 |
+
)
|
16 |
+
|
17 |
+
def get_researcher_agent(verbose):
|
18 |
+
return Agent(
|
19 |
+
role="Medical Research Scientist",
|
20 |
+
goal="""Conduct thorough scientific literature review and synthesize findings into a comprehensive research summary.""",
|
21 |
+
backstory="""You are an expert medical research scientist with extensive experience in systematic reviews and meta-analyses.
|
22 |
+
You specialize in analyzing clinical studies, understanding research methodologies, and synthesizing evidence from multiple sources.
|
23 |
+
Your expertise includes critical appraisal of medical literature, statistical analysis, and identification of key clinical findings.""",
|
24 |
+
llm=get_gemini_llm(),
|
25 |
+
tools=[pmc_search, google_scholar_search],
|
26 |
+
allow_delegation=False,
|
27 |
+
verbose=verbose
|
28 |
+
)
|
29 |
+
|
30 |
+
def get_writer_agent(verbose):
|
31 |
+
return Agent(
|
32 |
+
role="Medical Writer",
|
33 |
+
goal="""Transform research findings into a well-structured, comprehensive scientific paper following academic standards.""",
|
34 |
+
backstory="""You are an experienced medical writer with expertise in creating high-quality scientific manuscripts.
|
35 |
+
You excel at organizing complex medical information, maintaining scientific accuracy, and following academic writing standards.
|
36 |
+
Your writing is clear, precise, and always supported by evidence from the literature.""",
|
37 |
+
llm=get_gemini_llm(),
|
38 |
+
tools=[today_tool],
|
39 |
+
allow_delegation=False,
|
40 |
+
verbose=verbose
|
41 |
+
)
|
app.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from crew import get_crew
|
5 |
+
import threading
|
6 |
+
|
7 |
+
# Load environment variables
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
# Global lock for thread safety
|
11 |
+
lock = threading.Lock()
|
12 |
+
|
13 |
+
VERBOSE = False
|
14 |
+
|
15 |
+
def invoke(topic):
|
16 |
+
"""Generate scientific paper based on the topic"""
|
17 |
+
if not topic:
|
18 |
+
raise gr.Error("Topic is required.")
|
19 |
+
|
20 |
+
with lock:
|
21 |
+
# Generate the paper
|
22 |
+
crew = get_crew(verbose=VERBOSE, topic=topic)
|
23 |
+
paper = str(crew.kickoff())
|
24 |
+
return paper
|
25 |
+
|
26 |
+
# Create the Gradio interface
|
27 |
+
css = """
|
28 |
+
.gradio-container {
|
29 |
+
font-family: 'Arial', sans-serif;
|
30 |
+
}
|
31 |
+
.paper-output {
|
32 |
+
font-size: 16px;
|
33 |
+
line-height: 1.6;
|
34 |
+
padding: 20px;
|
35 |
+
background: #f9f9f9;
|
36 |
+
border-radius: 10px;
|
37 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
38 |
+
}
|
39 |
+
"""
|
40 |
+
|
41 |
+
demo = gr.Interface(
|
42 |
+
fn=invoke,
|
43 |
+
inputs=[
|
44 |
+
gr.Textbox(
|
45 |
+
label="Research Topic",
|
46 |
+
placeholder="Enter your research topic...",
|
47 |
+
lines=2
|
48 |
+
)
|
49 |
+
],
|
50 |
+
outputs=[
|
51 |
+
gr.Markdown(
|
52 |
+
label="Generated Scientific Paper",
|
53 |
+
elem_classes="paper-output"
|
54 |
+
)
|
55 |
+
],
|
56 |
+
title="AI Scientific Paper Generator",
|
57 |
+
description="""This application uses AI agents to generate comprehensive scientific papers.
|
58 |
+
The first agent researches your topic using PubMed Central and Google Scholar,
|
59 |
+
collecting at least 20 relevant articles. The second agent then synthesizes this
|
60 |
+
research into a well-structured scientific paper with proper citations.
|
61 |
+
|
62 |
+
Created by Dr. Fernando Ly""",
|
63 |
+
article="""### How it works
|
64 |
+
1. Enter your research topic
|
65 |
+
2. The Research Agent will collect relevant scientific articles
|
66 |
+
3. The Writing Agent will generate a structured paper with:
|
67 |
+
- Introduction
|
68 |
+
- Materials and Methods
|
69 |
+
- Results
|
70 |
+
- Discussion
|
71 |
+
- Conclusion
|
72 |
+
- References (APA format)
|
73 |
+
|
74 |
+
The paper will include proper citations and be based on real scientific literature.""",
|
75 |
+
css=css,
|
76 |
+
theme=gr.themes.Soft(
|
77 |
+
primary_hue="blue",
|
78 |
+
secondary_hue="gray"
|
79 |
+
)
|
80 |
+
)
|
81 |
+
|
82 |
+
# Launch the app
|
83 |
+
if __name__ == "__main__":
|
84 |
+
demo.launch()
|
crew.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from crewai import Crew, Process
|
2 |
+
from agents import get_researcher_agent, get_writer_agent
|
3 |
+
from tasks import get_research_task, get_writing_task
|
4 |
+
|
5 |
+
def get_crew(verbose=False, topic=None):
|
6 |
+
# Create Agents
|
7 |
+
researcher = get_researcher_agent(verbose)
|
8 |
+
writer = get_writer_agent(verbose)
|
9 |
+
|
10 |
+
# Create Tasks
|
11 |
+
research_task = get_research_task(researcher, topic)
|
12 |
+
writing_task = get_writing_task(writer, topic)
|
13 |
+
|
14 |
+
# Create Crew
|
15 |
+
crew = Crew(
|
16 |
+
agents=[researcher, writer],
|
17 |
+
tasks=[research_task, writing_task],
|
18 |
+
verbose=verbose,
|
19 |
+
process=Process.sequential
|
20 |
+
)
|
21 |
+
|
22 |
+
return crew
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
crewai==0.11.0
|
2 |
+
langchain-google-genai==0.0.6
|
3 |
+
google-generativeai==0.3.2
|
4 |
+
scholarly==1.7.11
|
5 |
+
biopython==1.83
|
6 |
+
beautifulsoup4==4.12.2
|
7 |
+
python-dotenv==1.0.0
|
8 |
+
gradio==4.8.0
|
tasks.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from crewai import Task
|
2 |
+
|
3 |
+
def get_research_task(agent, topic):
|
4 |
+
return Task(
|
5 |
+
description=f"""Research the topic "{topic}" extensively using PMC and Google Scholar.
|
6 |
+
|
7 |
+
Follow these steps:
|
8 |
+
1. First search PMC using the pmc_search tool
|
9 |
+
2. Then search Google Scholar using the google_scholar_search tool
|
10 |
+
3. For each source, carefully extract and analyze:
|
11 |
+
- Title
|
12 |
+
- Authors
|
13 |
+
- Publication year
|
14 |
+
- Key findings from abstract
|
15 |
+
- Methodology used
|
16 |
+
- Outcomes reported
|
17 |
+
4. Combine and synthesize the information:
|
18 |
+
- Identify common themes
|
19 |
+
- Note conflicting findings
|
20 |
+
- Highlight strongest evidence
|
21 |
+
5. Organize findings by:
|
22 |
+
- Study type (RCT, cohort, etc.)
|
23 |
+
- Sample size
|
24 |
+
- Follow-up duration
|
25 |
+
- Outcome measures
|
26 |
+
|
27 |
+
If search results are limited:
|
28 |
+
1. Try alternative search terms
|
29 |
+
2. Broaden the search scope
|
30 |
+
3. Include related conditions/treatments
|
31 |
+
|
32 |
+
Output Format:
|
33 |
+
1. Summary of search strategy
|
34 |
+
2. Overview of findings
|
35 |
+
3. Detailed analysis of key studies
|
36 |
+
4. Evidence synthesis
|
37 |
+
5. Knowledge gaps identified""",
|
38 |
+
agent=agent,
|
39 |
+
expected_output="A comprehensive research summary with detailed analysis of findings from both PMC and Google Scholar."
|
40 |
+
)
|
41 |
+
|
42 |
+
def get_writing_task(agent, topic):
|
43 |
+
return Task(
|
44 |
+
description=f"""Write a comprehensive scientific paper about "{topic}" based on the research provided.
|
45 |
+
|
46 |
+
Paper Structure:
|
47 |
+
1. Introduction:
|
48 |
+
- Current state of knowledge
|
49 |
+
- Clinical significance
|
50 |
+
- Research gaps
|
51 |
+
- Clear objectives
|
52 |
+
|
53 |
+
2. Materials and Methods:
|
54 |
+
- Search strategy
|
55 |
+
- Inclusion criteria
|
56 |
+
- Data extraction process
|
57 |
+
- Quality assessment
|
58 |
+
|
59 |
+
3. Results (minimum one page):
|
60 |
+
- Study characteristics
|
61 |
+
- Patient demographics
|
62 |
+
- Primary outcomes
|
63 |
+
- Secondary outcomes
|
64 |
+
- Safety data
|
65 |
+
- Meta-analysis if applicable
|
66 |
+
|
67 |
+
4. Discussion (minimum one page):
|
68 |
+
- Summary of main findings
|
69 |
+
- Comparison with existing literature
|
70 |
+
- Clinical implications
|
71 |
+
- Strengths and limitations
|
72 |
+
- Future research directions
|
73 |
+
|
74 |
+
5. Conclusion:
|
75 |
+
- Key takeaways
|
76 |
+
- Clinical recommendations
|
77 |
+
- Research priorities
|
78 |
+
|
79 |
+
6. References:
|
80 |
+
- APA format
|
81 |
+
- All cited studies included
|
82 |
+
|
83 |
+
Writing Guidelines:
|
84 |
+
- Use clear, scientific language
|
85 |
+
- Support all claims with evidence
|
86 |
+
- Include relevant statistics
|
87 |
+
- Maintain objectivity
|
88 |
+
- Address conflicting findings
|
89 |
+
- Consider clinical relevance""",
|
90 |
+
agent=agent,
|
91 |
+
expected_output="A well-structured scientific paper with comprehensive analysis and proper citations."
|
92 |
+
)
|
tools.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.tools import tool
|
2 |
+
from scholarly import scholarly, ProxyGenerator
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import requests
|
5 |
+
import datetime
|
6 |
+
import json
|
7 |
+
import time
|
8 |
+
|
9 |
+
# Configure scholarly with proxy to avoid blocking
|
10 |
+
pg = ProxyGenerator()
|
11 |
+
pg.FreeProxies()
|
12 |
+
scholarly.use_proxy(pg)
|
13 |
+
|
14 |
+
@tool
|
15 |
+
def pmc_search(query: str) -> str:
|
16 |
+
"""Search PubMed Central (PMC) for articles"""
|
17 |
+
try:
|
18 |
+
# Base URLs for PubMed APIs
|
19 |
+
search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
20 |
+
fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
21 |
+
|
22 |
+
# Search parameters
|
23 |
+
search_params = {
|
24 |
+
"db": "pmc",
|
25 |
+
"term": query,
|
26 |
+
"retmax": 20,
|
27 |
+
"retmode": "json",
|
28 |
+
"sort": "relevance"
|
29 |
+
}
|
30 |
+
|
31 |
+
# Get article IDs
|
32 |
+
response = requests.get(search_url, params=search_params)
|
33 |
+
if not response.ok:
|
34 |
+
return json.dumps([{"error": "PubMed search failed"}])
|
35 |
+
|
36 |
+
try:
|
37 |
+
search_data = response.json()
|
38 |
+
article_ids = search_data.get("esearchresult", {}).get("idlist", [])
|
39 |
+
except:
|
40 |
+
# Fallback to XML parsing if JSON fails
|
41 |
+
soup = BeautifulSoup(response.text, 'xml')
|
42 |
+
article_ids = [id.text for id in soup.find_all('Id')]
|
43 |
+
|
44 |
+
articles = []
|
45 |
+
for pmid in article_ids:
|
46 |
+
try:
|
47 |
+
# Fetch article details
|
48 |
+
fetch_params = {
|
49 |
+
"db": "pmc",
|
50 |
+
"id": pmid,
|
51 |
+
"retmode": "xml"
|
52 |
+
}
|
53 |
+
article_response = requests.get(fetch_url, params=fetch_params)
|
54 |
+
if not article_response.ok:
|
55 |
+
continue
|
56 |
+
|
57 |
+
article_soup = BeautifulSoup(article_response.text, 'xml')
|
58 |
+
|
59 |
+
# Extract article data
|
60 |
+
title_elem = article_soup.find("article-title")
|
61 |
+
title = title_elem.text if title_elem else "No title"
|
62 |
+
|
63 |
+
abstract_elem = article_soup.find("abstract")
|
64 |
+
abstract = abstract_elem.text if abstract_elem else "No abstract"
|
65 |
+
|
66 |
+
authors = []
|
67 |
+
for author in article_soup.find_all(["author", "contrib"]):
|
68 |
+
surname = author.find(["surname", "last-name"])
|
69 |
+
given_name = author.find(["given-names", "first-name"])
|
70 |
+
if surname:
|
71 |
+
author_name = surname.text
|
72 |
+
if given_name:
|
73 |
+
author_name = f"{given_name.text} {author_name}"
|
74 |
+
authors.append(author_name)
|
75 |
+
|
76 |
+
year_elem = article_soup.find(["pub-date", "year"])
|
77 |
+
year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"
|
78 |
+
|
79 |
+
journal_elem = article_soup.find(["journal-title", "source"])
|
80 |
+
journal = journal_elem.text if journal_elem else "Unknown Journal"
|
81 |
+
|
82 |
+
articles.append({
|
83 |
+
"id": pmid,
|
84 |
+
"title": title,
|
85 |
+
"authors": authors,
|
86 |
+
"year": year,
|
87 |
+
"journal": journal,
|
88 |
+
"abstract": abstract
|
89 |
+
})
|
90 |
+
|
91 |
+
# Add delay to avoid rate limiting
|
92 |
+
time.sleep(0.5)
|
93 |
+
|
94 |
+
except Exception as e:
|
95 |
+
continue
|
96 |
+
|
97 |
+
return json.dumps(articles, indent=2)
|
98 |
+
|
99 |
+
except Exception as e:
|
100 |
+
return json.dumps([{"error": f"PMC search failed: {str(e)}"}])
|
101 |
+
|
102 |
+
@tool
|
103 |
+
def google_scholar_search(query: str) -> str:
|
104 |
+
"""Search Google Scholar for articles"""
|
105 |
+
try:
|
106 |
+
# Configure proxy and retry mechanism
|
107 |
+
if not scholarly.use_proxy(pg):
|
108 |
+
pg.FreeProxies()
|
109 |
+
scholarly.use_proxy(pg)
|
110 |
+
|
111 |
+
search_query = scholarly.search_pubs(query)
|
112 |
+
results = []
|
113 |
+
count = 0
|
114 |
+
max_retries = 3
|
115 |
+
|
116 |
+
while count < 20:
|
117 |
+
try:
|
118 |
+
result = next(search_query)
|
119 |
+
|
120 |
+
# Extract publication data
|
121 |
+
pub = {
|
122 |
+
"title": result.bib.get('title', 'No title'),
|
123 |
+
"authors": result.bib.get('author', 'No author').split(" and "),
|
124 |
+
"year": result.bib.get('year', 'No year'),
|
125 |
+
"abstract": result.bib.get('abstract', 'No abstract'),
|
126 |
+
"journal": result.bib.get('journal', result.bib.get('venue', 'No venue')),
|
127 |
+
"citations": result.citedby if hasattr(result, 'citedby') else 0
|
128 |
+
}
|
129 |
+
|
130 |
+
# Skip if no title or abstract
|
131 |
+
if pub["title"] == 'No title' or pub["abstract"] == 'No abstract':
|
132 |
+
continue
|
133 |
+
|
134 |
+
results.append(pub)
|
135 |
+
count += 1
|
136 |
+
|
137 |
+
# Add delay to avoid rate limiting
|
138 |
+
time.sleep(0.5)
|
139 |
+
|
140 |
+
except StopIteration:
|
141 |
+
break
|
142 |
+
except Exception as e:
|
143 |
+
if max_retries > 0:
|
144 |
+
max_retries -= 1
|
145 |
+
time.sleep(1)
|
146 |
+
continue
|
147 |
+
else:
|
148 |
+
break
|
149 |
+
|
150 |
+
return json.dumps(results, indent=2)
|
151 |
+
|
152 |
+
except Exception as e:
|
153 |
+
return json.dumps([{"error": f"Google Scholar search failed: {str(e)}"}])
|
154 |
+
|
155 |
+
@tool
|
156 |
+
def today_tool() -> str:
|
157 |
+
"""Get today's date"""
|
158 |
+
return str(datetime.date.today())
|