IDAgentsFreshTest / tools /retrieve_guidelines.py
IDAgents Developer
Deploy COMPLETE ID Agents - Medical AI system (clean, no cache files)
8120936
raw
history blame
24 kB
"""
retrieve_guidelines.py
----------------------
Tool for retrieving clinical practice guidelines, with focus on IDSA (Infectious Diseases Society of America) guidelines.
This tool searches for and retrieves the most current clinical guidelines based on user queries about specific
infectious disease topics, conditions, or pathogens. It leverages internet search to find official IDSA
guidelines and extracts key recommendations, treatment algorithms, and clinical guidance.
Key Features:
- Searches official IDSA website and trusted medical sources
- Filters results by relevance to specific infectious disease topics
- Extracts key recommendations and treatment guidance
- Provides proper citations and publication dates
- Handles multiple guideline topics (pneumonia, UTI, sepsis, etc.)
"""
import asyncio
import re
from typing import Any, Dict, List, Union
from tools.base import Tool
from tools.utils import ToolExecutionError, logger
class RetrieveGuidelinesTool(Tool):
"""
Tool for retrieving clinical practice guidelines, with focus on IDSA guidelines.
This tool searches for current IDSA guidelines based on user queries about specific
infectious disease conditions, pathogens, or clinical scenarios.
"""
def __init__(self) -> None:
"""Initialize the RetrieveGuidelinesTool."""
super().__init__()
self.name = "retrieve_guidelines"
self.description = "Retrieve clinical practice guidelines for specific infectious disease topics, conditions, or pathogens, with focus on IDSA guidelines."
self.args_schema = {
"type": "object",
"properties": {
"topic": {
"type": "string",
"description": "The infectious disease topic, condition, or pathogen to search for (e.g., 'pneumonia', 'UTI', 'sepsis', 'MRSA', 'C. difficile')"
},
"specific_focus": {
"type": "string",
"description": "Optional: Specific aspect of the topic (e.g., 'treatment', 'diagnosis', 'prophylaxis', 'pediatric')",
"default": ""
}
},
"required": ["topic"]
}
def openai_spec(self, legacy=False):
"""Return OpenAI function specification."""
return {
"name": self.name,
"description": self.description,
"parameters": self.args_schema
}
async def run(
self,
topic: str,
specific_focus: str = ""
) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
"""
Retrieve the latest IDSA guidelines for the specified topic.
Args:
topic (str): The infectious disease topic to search for
specific_focus (str, optional): Specific aspect to focus on
Returns:
Union[List[Dict[str, Any]], Dict[str, Any]]: Guidelines information or error dict
"""
try:
# Import internet search tool
from tools.internet_search import InternetSearchTool
internet_tool = InternetSearchTool()
# Construct search queries for IDSA guidelines
search_queries = self._build_search_queries(topic, specific_focus)
guidelines_data = []
for query in search_queries:
try:
# Search for guidelines
search_results = await internet_tool.run(query)
# Parse the string response into structured data
if isinstance(search_results, str):
parsed_results = self._parse_search_results(search_results)
# Filter and process results
relevant_guidelines = self._filter_idsa_guidelines(parsed_results, topic)
guidelines_data.extend(relevant_guidelines)
elif isinstance(search_results, list):
# Handle list format (if returned)
relevant_guidelines = self._filter_idsa_guidelines(search_results, topic)
guidelines_data.extend(relevant_guidelines)
except Exception as e:
logger.warning(f"Search failed for query '{query}': {e}")
continue
# Remove duplicates and sort by relevance
guidelines_data = self._deduplicate_and_rank(guidelines_data, topic)
if not guidelines_data:
# Fallback: try broader search for general treatment guidelines
fallback_queries = [
f"site:idsociety.org {topic} treatment",
f"site:idsociety.org {topic} management",
f"site:idsociety.org {topic} clinical",
f"IDSA {topic} therapy"
]
for query in fallback_queries:
try:
search_results = await internet_tool.run(query)
if isinstance(search_results, str):
parsed_results = self._parse_search_results(search_results)
relevant_guidelines = self._filter_idsa_guidelines(parsed_results, topic)
guidelines_data.extend(relevant_guidelines)
elif isinstance(search_results, list):
relevant_guidelines = self._filter_idsa_guidelines(search_results, topic)
guidelines_data.extend(relevant_guidelines)
except Exception as e:
continue
guidelines_data = self._deduplicate_and_rank(guidelines_data, topic)
if not guidelines_data:
return {
"error": f"No IDSA guidelines found for topic: {topic}",
"suggestion": "Try searching for broader terms like 'infectious diseases', 'antimicrobial therapy', or specific pathogens. Note: IDSA may not have specific guidelines for all conditions.",
"topic": topic,
"guidelines": [],
"note": "This search is limited to official IDSA guidelines only. For tuberculosis, IDSA may refer to CDC or WHO guidelines as the primary authorities."
}
# Extract key information from top results
processed_guidelines = self._extract_guideline_info(guidelines_data[:3], topic)
# Generate a summary that answers the user's question
question_summary = self._generate_question_summary(processed_guidelines, topic, specific_focus)
return {
"topic": topic,
"specific_focus": specific_focus,
"guidelines_found": len(processed_guidelines),
"question_summary": question_summary,
"guidelines": processed_guidelines,
"search_timestamp": "2025-07-18",
"source": "IDSA (Infectious Diseases Society of America)"
}
except Exception as e:
logger.error(f"RetrieveGuidelinesTool failed: {e}", exc_info=True)
raise ToolExecutionError(f"Failed to retrieve guidelines: {e}")
def _build_search_queries(self, topic: str, specific_focus: str) -> List[str]:
"""Build comprehensive search queries for IDSA guidelines."""
queries = []
# Map common terms to more specific medical terms
topic_mapping = {
'tuberculosis': ['tuberculosis', 'TB', 'mycobacterium tuberculosis', 'pulmonary tuberculosis'],
'pneumonia': ['pneumonia', 'community-acquired pneumonia', 'CAP', 'hospital-acquired pneumonia'],
'sepsis': ['sepsis', 'severe sepsis', 'septic shock', 'bloodstream infection'],
'meningitis': ['meningitis', 'bacterial meningitis', 'CNS infection'],
'endocarditis': ['endocarditis', 'infective endocarditis', 'valve infection'],
'uti': ['urinary tract infection', 'UTI', 'cystitis', 'pyelonephritis']
}
# Get all variations of the topic
topic_variations = topic_mapping.get(topic.lower(), [topic])
# Primary IDSA-specific queries
for variation in topic_variations:
queries.extend([
f"IDSA guidelines {variation}",
f"IDSA clinical practice guidelines {variation}",
f"Infectious Diseases Society of America {variation} guidelines",
f"IDSA {variation} treatment guidelines",
f"IDSA {variation} management recommendations",
f"site:idsociety.org {variation} guidelines"
])
# Add specific focus if provided
if specific_focus:
for variation in topic_variations:
queries.extend([
f"IDSA guidelines {variation} {specific_focus}",
f"IDSA {variation} {specific_focus} recommendations"
])
# Add broader searches for less common conditions
if topic.lower() in ['tuberculosis', 'tb']:
queries.extend([
"IDSA mycobacterial infections guidelines",
"IDSA tuberculosis screening guidelines",
"IDSA latent tuberculosis treatment",
"site:idsociety.org tuberculosis guidelines",
"site:idsociety.org TB guidelines",
"site:idsociety.org mycobacterium tuberculosis"
])
# Add year-specific searches for latest guidelines
current_year = 2025
for year in [current_year, current_year-1, current_year-2]:
queries.append(f"IDSA {topic} guidelines {year}")
return queries[:15] # Limit to 15 most relevant queries
def _parse_search_results(self, search_results_str: str) -> List[Dict]:
"""Parse the formatted search results string into structured data."""
results = []
# Split by entries (each entry starts with **)
entries = re.split(r'\*\*([^*]+)\*\*', search_results_str)
for i in range(1, len(entries), 2): # Skip first empty entry, then take every other
if i + 1 < len(entries):
title = entries[i].strip()
content_and_link = entries[i + 1].strip()
# Extract the link
link_match = re.search(r'\[Read more\]\(([^)]+)\)', content_and_link)
url = link_match.group(1) if link_match else ""
# Extract the content (everything before the link)
content = re.sub(r'\[Read more\]\([^)]+\)', '', content_and_link).strip()
if title and url:
results.append({
'title': title,
'url': url,
'content': content,
'snippet': content
})
return results
def _filter_idsa_guidelines(self, search_results: List[Dict], topic: str) -> List[Dict]:
"""Filter search results to focus ONLY on official IDSA guidelines."""
filtered_results = []
for result in search_results:
url = result.get('url', '').lower()
title = result.get('title', '').lower()
content = result.get('content', '').lower()
# Check if it's from official IDSA sources ONLY
is_official_idsa = any(domain in url for domain in [
'idsociety.org',
'idsa.org',
'academic.oup.com/cid' # Clinical Infectious Diseases journal (IDSA's official journal)
])
# Check if it contains IDSA-specific guideline indicators
is_idsa_guideline = any(indicator in title or indicator in content for indicator in [
'idsa', 'infectious diseases society of america', 'infectious diseases society',
'idsa guideline', 'idsa guidelines', 'idsa clinical practice'
])
# Enhanced topic relevance check
topic_keywords = self._get_topic_keywords(topic)
topic_relevant = any(keyword in title or keyword in content for keyword in topic_keywords)
# Only include if it's from official IDSA source AND contains guideline indicators AND is topic relevant
if topic_relevant and (is_official_idsa or is_idsa_guideline):
result['relevance_score'] = self._calculate_relevance_score(result, topic)
filtered_results.append(result)
return filtered_results
def _get_topic_keywords(self, topic: str) -> List[str]:
"""Get relevant keywords for topic matching."""
base_keywords = [topic.lower(), *topic.lower().split()]
# Add specific synonyms and related terms
keyword_mapping = {
'tuberculosis': ['tuberculosis', 'tb', 'mycobacterium', 'pulmonary tb', 'latent tb', 'active tb'],
'pneumonia': ['pneumonia', 'cap', 'hospital-acquired', 'ventilator-associated', 'lung infection'],
'sepsis': ['sepsis', 'septic shock', 'bloodstream infection', 'bacteremia'],
'meningitis': ['meningitis', 'cns infection', 'bacterial meningitis', 'brain infection'],
'endocarditis': ['endocarditis', 'infective endocarditis', 'valve infection', 'heart infection'],
'uti': ['urinary tract infection', 'uti', 'cystitis', 'pyelonephritis', 'bladder infection']
}
if topic.lower() in keyword_mapping:
base_keywords.extend(keyword_mapping[topic.lower()])
return base_keywords
def _calculate_relevance_score(self, result: Dict, topic: str) -> float:
"""Calculate relevance score for a search result."""
score = 0.0
url = result.get('url', '').lower()
title = result.get('title', '').lower()
content = result.get('content', '').lower()
# Official IDSA sources get highest scores
if 'idsociety.org' in url:
score += 20.0
elif 'idsa.org' in url:
score += 18.0
elif 'academic.oup.com/cid' in url:
score += 15.0
# IDSA-specific terms get high scores
idsa_terms = ['idsa', 'infectious diseases society of america', 'infectious diseases society']
for term in idsa_terms:
if term in title:
score += 10.0
elif term in content:
score += 5.0
# Guideline-specific terms
guideline_terms = ['guideline', 'guidelines', 'clinical practice', 'recommendations']
for term in guideline_terms:
if term in title:
score += 8.0
elif term in content:
score += 4.0
# Topic relevance
topic_keywords = self._get_topic_keywords(topic)
for keyword in topic_keywords:
if keyword in title:
score += 6.0
elif keyword in content:
score += 2.0
# Recency indicators
recent_years = ['2025', '2024', '2023', '2022', '2021']
for year in recent_years:
if year in title or year in content:
score += 2.0
break
return score
def _deduplicate_and_rank(self, guidelines_data: List[Dict], topic: str) -> List[Dict]:
"""Remove duplicates and rank guidelines by relevance."""
# Remove duplicates based on URL
seen_urls = set()
unique_guidelines = []
for guideline in guidelines_data:
url = guideline.get('url', '')
if url not in seen_urls:
seen_urls.add(url)
unique_guidelines.append(guideline)
# Sort by relevance score
unique_guidelines.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
return unique_guidelines
def _extract_guideline_info(self, guidelines_data: List[Dict], topic: str) -> List[Dict]:
"""Extract key information from guideline search results."""
processed_guidelines = []
for guideline in guidelines_data:
try:
# Extract key information
title = guideline.get('title', '')
url = guideline.get('url', '')
content = guideline.get('content', '')
# Extract publication year
pub_year = self._extract_publication_year(title, content)
# Extract key recommendations
recommendations = self._extract_recommendations(content)
# Extract authors/organization
authors = self._extract_authors(content)
processed_guideline = {
'title': title,
'url': url,
'publication_year': pub_year,
'authors': authors,
'key_recommendations': recommendations,
'relevance_score': guideline.get('relevance_score', 0),
'summary': self._generate_summary(content, topic)
}
processed_guidelines.append(processed_guideline)
except Exception as e:
logger.warning(f"Failed to process guideline: {e}")
continue
return processed_guidelines
def _extract_publication_year(self, title: str, content: str) -> str:
"""Extract publication year from title or content."""
# Look for years in title first
year_pattern = r'\b(20\d{2})\b'
for text in [title, content]:
matches = re.findall(year_pattern, text)
if matches:
# Return the most recent year found
return max(matches)
return "Unknown"
def _extract_recommendations(self, content: str) -> List[str]:
"""Extract key recommendations from guideline content."""
recommendations = []
# Look for common recommendation patterns
recommendation_patterns = [
r'recommend[s]?\s+([^.]+)',
r'should\s+([^.]+)',
r'we\s+recommend\s+([^.]+)',
r'grade\s+[AB]\s+recommendation[:\s]+([^.]+)',
r'strong\s+recommendation[:\s]+([^.]+)'
]
for pattern in recommendation_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
recommendations.extend(matches[:3]) # Limit to top 3 per pattern
# Clean up recommendations
cleaned_recommendations = []
for rec in recommendations:
cleaned = rec.strip()
if len(cleaned) > 20 and len(cleaned) < 200: # Reasonable length
cleaned_recommendations.append(cleaned)
return cleaned_recommendations[:5] # Return top 5 recommendations
def _extract_authors(self, content: str) -> str:
"""Extract authors or organization from content."""
# Look for IDSA or author patterns
author_patterns = [
r'infectious\s+diseases\s+society\s+of\s+america',
r'idsa',
r'authored?\s+by\s+([^.]+)',
r'committee\s+([^.]+)'
]
for pattern in author_patterns:
match = re.search(pattern, content, re.IGNORECASE)
if match:
if 'idsa' in pattern or 'infectious' in pattern:
return "Infectious Diseases Society of America (IDSA)"
else:
return match.group(1).strip()
return "IDSA"
def _generate_summary(self, content: str, topic: str) -> str:
"""Generate a brief summary of the guideline."""
# Extract first few sentences that mention the topic
sentences = content.split('.')
relevant_sentences = []
for sentence in sentences[:10]: # Check first 10 sentences
if topic.lower() in sentence.lower():
relevant_sentences.append(sentence.strip())
if len(relevant_sentences) >= 2:
break
if relevant_sentences:
return '. '.join(relevant_sentences) + '.'
else:
# Return first sentence if no topic-specific content found
return sentences[0].strip() + '.' if sentences else "IDSA clinical practice guideline."
def _generate_question_summary(self, guidelines: List[Dict], topic: str, specific_focus: str) -> str:
"""Generate a concise summary that answers the user's question based on the guidelines found."""
if not guidelines:
return f"No IDSA guidelines found specifically addressing {topic}."
# Build the summary based on the specific focus or general topic
if specific_focus:
question_context = f"{topic} {specific_focus}"
else:
question_context = topic
# Extract key information from the guidelines
key_points = []
recommendations = []
for guideline in guidelines:
# Get key recommendations
guideline_recs = guideline.get('key_recommendations', [])
recommendations.extend(guideline_recs[:2]) # Take top 2 from each guideline
# Extract key points from summary
summary = guideline.get('summary', '')
if summary and len(summary) > 20:
key_points.append(summary)
# Build the summary
summary_parts = []
# Start with context
summary_parts.append(f"Based on IDSA guidelines for {question_context}:")
# Add key recommendations if available
if recommendations:
summary_parts.append("\n**Key Recommendations:**")
for i, rec in enumerate(recommendations[:3], 1): # Limit to top 3
summary_parts.append(f"{i}. {rec.strip()}")
# Add general guidance from guidelines
if key_points:
summary_parts.append(f"\n**Clinical Guidance:**")
# Combine and summarize key points
combined_guidance = ' '.join(key_points[:2]) # Use first 2 summaries
# Extract most relevant sentences
sentences = combined_guidance.split('.')
relevant_sentences = [s.strip() for s in sentences if len(s.strip()) > 30][:2]
for sentence in relevant_sentences:
if sentence:
summary_parts.append(f"• {sentence}.")
# Add specific guidance based on common scenarios
if topic.lower() in ['tuberculosis', 'tb']:
if 'quantiferon' in (specific_focus or '').lower() or 'igra' in (specific_focus or '').lower():
summary_parts.append(f"\n**For undetermined IGRA/QuantiFERON results:** Consider clinical risk factors, repeat testing, or alternative diagnostic approaches as outlined in the guidelines.")
# Combine all parts
full_summary = '\n'.join(summary_parts)
# Ensure summary is not too long
if len(full_summary) > 500:
# Truncate and add ellipsis
full_summary = full_summary[:497] + "..."
return full_summary