Spaces:

John-jero
/

IDAgentsFreshTest

Sleeping

IDAgentsFreshTest / tools /retrieve_guidelines.py

IDAgents Developer

Deploy COMPLETE ID Agents - Medical AI system (clean, no cache files)

8120936 3 months ago

24 kB

	"""
	retrieve_guidelines.py
	----------------------

	Tool for retrieving clinical practice guidelines, with focus on IDSA (Infectious Diseases Society of America) guidelines.

	This tool searches for and retrieves the most current clinical guidelines based on user queries about specific
	infectious disease topics, conditions, or pathogens. It leverages internet search to find official IDSA
	guidelines and extracts key recommendations, treatment algorithms, and clinical guidance.

	Key Features:
	- Searches official IDSA website and trusted medical sources
	- Filters results by relevance to specific infectious disease topics
	- Extracts key recommendations and treatment guidance
	- Provides proper citations and publication dates
	- Handles multiple guideline topics (pneumonia, UTI, sepsis, etc.)
	"""

	import asyncio
	import re
	from typing import Any, Dict, List, Union
	from tools.base import Tool
	from tools.utils import ToolExecutionError, logger

	class RetrieveGuidelinesTool(Tool):
	"""
	Tool for retrieving clinical practice guidelines, with focus on IDSA guidelines.

	This tool searches for current IDSA guidelines based on user queries about specific
	infectious disease conditions, pathogens, or clinical scenarios.
	"""

	def __init__(self) -> None:
	"""Initialize the RetrieveGuidelinesTool."""
	super().__init__()
	self.name = "retrieve_guidelines"
	self.description = "Retrieve clinical practice guidelines for specific infectious disease topics, conditions, or pathogens, with focus on IDSA guidelines."
	self.args_schema = {
	"type": "object",
	"properties": {
	"topic": {
	"type": "string",
	"description": "The infectious disease topic, condition, or pathogen to search for (e.g., 'pneumonia', 'UTI', 'sepsis', 'MRSA', 'C. difficile')"
	},
	"specific_focus": {
	"type": "string",
	"description": "Optional: Specific aspect of the topic (e.g., 'treatment', 'diagnosis', 'prophylaxis', 'pediatric')",
	"default": ""
	}
	},
	"required": ["topic"]
	}

	def openai_spec(self, legacy=False):
	"""Return OpenAI function specification."""
	return {
	"name": self.name,
	"description": self.description,
	"parameters": self.args_schema
	}

	async def run(
	self,
	topic: str,
	specific_focus: str = ""
	) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
	"""
	Retrieve the latest IDSA guidelines for the specified topic.

	Args:
	topic (str): The infectious disease topic to search for
	specific_focus (str, optional): Specific aspect to focus on

	Returns:
	Union[List[Dict[str, Any]], Dict[str, Any]]: Guidelines information or error dict
	"""
	try:
	# Import internet search tool
	from tools.internet_search import InternetSearchTool
	internet_tool = InternetSearchTool()

	# Construct search queries for IDSA guidelines
	search_queries = self._build_search_queries(topic, specific_focus)

	guidelines_data = []

	for query in search_queries:
	try:
	# Search for guidelines
	search_results = await internet_tool.run(query)

	# Parse the string response into structured data
	if isinstance(search_results, str):
	parsed_results = self._parse_search_results(search_results)
	# Filter and process results
	relevant_guidelines = self._filter_idsa_guidelines(parsed_results, topic)
	guidelines_data.extend(relevant_guidelines)
	elif isinstance(search_results, list):
	# Handle list format (if returned)
	relevant_guidelines = self._filter_idsa_guidelines(search_results, topic)
	guidelines_data.extend(relevant_guidelines)

	except Exception as e:
	logger.warning(f"Search failed for query '{query}': {e}")
	continue

	# Remove duplicates and sort by relevance
	guidelines_data = self._deduplicate_and_rank(guidelines_data, topic)

	if not guidelines_data:
	# Fallback: try broader search for general treatment guidelines
	fallback_queries = [
	f"site:idsociety.org {topic} treatment",
	f"site:idsociety.org {topic} management",
	f"site:idsociety.org {topic} clinical",
	f"IDSA {topic} therapy"
	]

	for query in fallback_queries:
	try:
	search_results = await internet_tool.run(query)
	if isinstance(search_results, str):
	parsed_results = self._parse_search_results(search_results)
	relevant_guidelines = self._filter_idsa_guidelines(parsed_results, topic)
	guidelines_data.extend(relevant_guidelines)
	elif isinstance(search_results, list):
	relevant_guidelines = self._filter_idsa_guidelines(search_results, topic)
	guidelines_data.extend(relevant_guidelines)
	except Exception as e:
	continue

	guidelines_data = self._deduplicate_and_rank(guidelines_data, topic)

	if not guidelines_data:
	return {
	"error": f"No IDSA guidelines found for topic: {topic}",
	"suggestion": "Try searching for broader terms like 'infectious diseases', 'antimicrobial therapy', or specific pathogens. Note: IDSA may not have specific guidelines for all conditions.",
	"topic": topic,
	"guidelines": [],
	"note": "This search is limited to official IDSA guidelines only. For tuberculosis, IDSA may refer to CDC or WHO guidelines as the primary authorities."
	}

	# Extract key information from top results
	processed_guidelines = self._extract_guideline_info(guidelines_data[:3], topic)

	# Generate a summary that answers the user's question
	question_summary = self._generate_question_summary(processed_guidelines, topic, specific_focus)

	return {
	"topic": topic,
	"specific_focus": specific_focus,
	"guidelines_found": len(processed_guidelines),
	"question_summary": question_summary,
	"guidelines": processed_guidelines,
	"search_timestamp": "2025-07-18",
	"source": "IDSA (Infectious Diseases Society of America)"
	}

	except Exception as e:
	logger.error(f"RetrieveGuidelinesTool failed: {e}", exc_info=True)
	raise ToolExecutionError(f"Failed to retrieve guidelines: {e}")

	def _build_search_queries(self, topic: str, specific_focus: str) -> List[str]:
	"""Build comprehensive search queries for IDSA guidelines."""
	queries = []

	# Map common terms to more specific medical terms
	topic_mapping = {
	'tuberculosis': ['tuberculosis', 'TB', 'mycobacterium tuberculosis', 'pulmonary tuberculosis'],
	'pneumonia': ['pneumonia', 'community-acquired pneumonia', 'CAP', 'hospital-acquired pneumonia'],
	'sepsis': ['sepsis', 'severe sepsis', 'septic shock', 'bloodstream infection'],
	'meningitis': ['meningitis', 'bacterial meningitis', 'CNS infection'],
	'endocarditis': ['endocarditis', 'infective endocarditis', 'valve infection'],
	'uti': ['urinary tract infection', 'UTI', 'cystitis', 'pyelonephritis']
	}

	# Get all variations of the topic
	topic_variations = topic_mapping.get(topic.lower(), [topic])

	# Primary IDSA-specific queries
	for variation in topic_variations:
	queries.extend([
	f"IDSA guidelines {variation}",
	f"IDSA clinical practice guidelines {variation}",
	f"Infectious Diseases Society of America {variation} guidelines",
	f"IDSA {variation} treatment guidelines",
	f"IDSA {variation} management recommendations",
	f"site:idsociety.org {variation} guidelines"
	])

	# Add specific focus if provided
	if specific_focus:
	for variation in topic_variations:
	queries.extend([
	f"IDSA guidelines {variation} {specific_focus}",
	f"IDSA {variation} {specific_focus} recommendations"
	])

	# Add broader searches for less common conditions
	if topic.lower() in ['tuberculosis', 'tb']:
	queries.extend([
	"IDSA mycobacterial infections guidelines",
	"IDSA tuberculosis screening guidelines",
	"IDSA latent tuberculosis treatment",
	"site:idsociety.org tuberculosis guidelines",
	"site:idsociety.org TB guidelines",
	"site:idsociety.org mycobacterium tuberculosis"
	])

	# Add year-specific searches for latest guidelines
	current_year = 2025
	for year in [current_year, current_year-1, current_year-2]:
	queries.append(f"IDSA {topic} guidelines {year}")

	return queries[:15] # Limit to 15 most relevant queries

	def _parse_search_results(self, search_results_str: str) -> List[Dict]:
	"""Parse the formatted search results string into structured data."""
	results = []

	# Split by entries (each entry starts with **)
	entries = re.split(r'\\([^]+)\\*', search_results_str)

	for i in range(1, len(entries), 2): # Skip first empty entry, then take every other
	if i + 1 < len(entries):
	title = entries[i].strip()
	content_and_link = entries[i + 1].strip()

	# Extract the link
	link_match = re.search(r'\[Read more\]\(([^)]+)\)', content_and_link)
	url = link_match.group(1) if link_match else ""

	# Extract the content (everything before the link)
	content = re.sub(r'\[Read more\]\([^)]+\)', '', content_and_link).strip()

	if title and url:
	results.append({
	'title': title,
	'url': url,
	'content': content,
	'snippet': content
	})

	return results

	def _filter_idsa_guidelines(self, search_results: List[Dict], topic: str) -> List[Dict]:
	"""Filter search results to focus ONLY on official IDSA guidelines."""
	filtered_results = []

	for result in search_results:
	url = result.get('url', '').lower()
	title = result.get('title', '').lower()
	content = result.get('content', '').lower()

	# Check if it's from official IDSA sources ONLY
	is_official_idsa = any(domain in url for domain in [
	'idsociety.org',
	'idsa.org',
	'academic.oup.com/cid' # Clinical Infectious Diseases journal (IDSA's official journal)
	])

	# Check if it contains IDSA-specific guideline indicators
	is_idsa_guideline = any(indicator in title or indicator in content for indicator in [
	'idsa', 'infectious diseases society of america', 'infectious diseases society',
	'idsa guideline', 'idsa guidelines', 'idsa clinical practice'
	])

	# Enhanced topic relevance check
	topic_keywords = self._get_topic_keywords(topic)
	topic_relevant = any(keyword in title or keyword in content for keyword in topic_keywords)

	# Only include if it's from official IDSA source AND contains guideline indicators AND is topic relevant
	if topic_relevant and (is_official_idsa or is_idsa_guideline):
	result['relevance_score'] = self._calculate_relevance_score(result, topic)
	filtered_results.append(result)

	return filtered_results

	def _get_topic_keywords(self, topic: str) -> List[str]:
	"""Get relevant keywords for topic matching."""
	base_keywords = [topic.lower(), *topic.lower().split()]

	# Add specific synonyms and related terms
	keyword_mapping = {
	'tuberculosis': ['tuberculosis', 'tb', 'mycobacterium', 'pulmonary tb', 'latent tb', 'active tb'],
	'pneumonia': ['pneumonia', 'cap', 'hospital-acquired', 'ventilator-associated', 'lung infection'],
	'sepsis': ['sepsis', 'septic shock', 'bloodstream infection', 'bacteremia'],
	'meningitis': ['meningitis', 'cns infection', 'bacterial meningitis', 'brain infection'],
	'endocarditis': ['endocarditis', 'infective endocarditis', 'valve infection', 'heart infection'],
	'uti': ['urinary tract infection', 'uti', 'cystitis', 'pyelonephritis', 'bladder infection']
	}

	if topic.lower() in keyword_mapping:
	base_keywords.extend(keyword_mapping[topic.lower()])

	return base_keywords

	def _calculate_relevance_score(self, result: Dict, topic: str) -> float:
	"""Calculate relevance score for a search result."""
	score = 0.0

	url = result.get('url', '').lower()
	title = result.get('title', '').lower()
	content = result.get('content', '').lower()

	# Official IDSA sources get highest scores
	if 'idsociety.org' in url:
	score += 20.0
	elif 'idsa.org' in url:
	score += 18.0
	elif 'academic.oup.com/cid' in url:
	score += 15.0

	# IDSA-specific terms get high scores
	idsa_terms = ['idsa', 'infectious diseases society of america', 'infectious diseases society']
	for term in idsa_terms:
	if term in title:
	score += 10.0
	elif term in content:
	score += 5.0

	# Guideline-specific terms
	guideline_terms = ['guideline', 'guidelines', 'clinical practice', 'recommendations']
	for term in guideline_terms:
	if term in title:
	score += 8.0
	elif term in content:
	score += 4.0

	# Topic relevance
	topic_keywords = self._get_topic_keywords(topic)
	for keyword in topic_keywords:
	if keyword in title:
	score += 6.0
	elif keyword in content:
	score += 2.0

	# Recency indicators
	recent_years = ['2025', '2024', '2023', '2022', '2021']
	for year in recent_years:
	if year in title or year in content:
	score += 2.0
	break

	return score

	def _deduplicate_and_rank(self, guidelines_data: List[Dict], topic: str) -> List[Dict]:
	"""Remove duplicates and rank guidelines by relevance."""
	# Remove duplicates based on URL
	seen_urls = set()
	unique_guidelines = []

	for guideline in guidelines_data:
	url = guideline.get('url', '')
	if url not in seen_urls:
	seen_urls.add(url)
	unique_guidelines.append(guideline)

	# Sort by relevance score
	unique_guidelines.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)

	return unique_guidelines

	def _extract_guideline_info(self, guidelines_data: List[Dict], topic: str) -> List[Dict]:
	"""Extract key information from guideline search results."""
	processed_guidelines = []

	for guideline in guidelines_data:
	try:
	# Extract key information
	title = guideline.get('title', '')
	url = guideline.get('url', '')
	content = guideline.get('content', '')

	# Extract publication year
	pub_year = self._extract_publication_year(title, content)

	# Extract key recommendations
	recommendations = self._extract_recommendations(content)

	# Extract authors/organization
	authors = self._extract_authors(content)

	processed_guideline = {
	'title': title,
	'url': url,
	'publication_year': pub_year,
	'authors': authors,
	'key_recommendations': recommendations,
	'relevance_score': guideline.get('relevance_score', 0),
	'summary': self._generate_summary(content, topic)
	}

	processed_guidelines.append(processed_guideline)

	except Exception as e:
	logger.warning(f"Failed to process guideline: {e}")
	continue

	return processed_guidelines

	def _extract_publication_year(self, title: str, content: str) -> str:
	"""Extract publication year from title or content."""
	# Look for years in title first
	year_pattern = r'\b(20\d{2})\b'

	for text in [title, content]:
	matches = re.findall(year_pattern, text)
	if matches:
	# Return the most recent year found
	return max(matches)

	return "Unknown"

	def _extract_recommendations(self, content: str) -> List[str]:
	"""Extract key recommendations from guideline content."""
	recommendations = []

	# Look for common recommendation patterns
	recommendation_patterns = [
	r'recommend[s]?\s+([^.]+)',
	r'should\s+([^.]+)',
	r'we\s+recommend\s+([^.]+)',
	r'grade\s+[AB]\s+recommendation[:\s]+([^.]+)',
	r'strong\s+recommendation[:\s]+([^.]+)'
	]

	for pattern in recommendation_patterns:
	matches = re.findall(pattern, content, re.IGNORECASE)
	recommendations.extend(matches[:3]) # Limit to top 3 per pattern

	# Clean up recommendations
	cleaned_recommendations = []
	for rec in recommendations:
	cleaned = rec.strip()
	if len(cleaned) > 20 and len(cleaned) < 200: # Reasonable length
	cleaned_recommendations.append(cleaned)

	return cleaned_recommendations[:5] # Return top 5 recommendations

	def _extract_authors(self, content: str) -> str:
	"""Extract authors or organization from content."""
	# Look for IDSA or author patterns
	author_patterns = [
	r'infectious\s+diseases\s+society\s+of\s+america',
	r'idsa',
	r'authored?\s+by\s+([^.]+)',
	r'committee\s+([^.]+)'
	]

	for pattern in author_patterns:
	match = re.search(pattern, content, re.IGNORECASE)
	if match:
	if 'idsa' in pattern or 'infectious' in pattern:
	return "Infectious Diseases Society of America (IDSA)"
	else:
	return match.group(1).strip()

	return "IDSA"

	def _generate_summary(self, content: str, topic: str) -> str:
	"""Generate a brief summary of the guideline."""
	# Extract first few sentences that mention the topic
	sentences = content.split('.')
	relevant_sentences = []

	for sentence in sentences[:10]: # Check first 10 sentences
	if topic.lower() in sentence.lower():
	relevant_sentences.append(sentence.strip())
	if len(relevant_sentences) >= 2:
	break

	if relevant_sentences:
	return '. '.join(relevant_sentences) + '.'
	else:
	# Return first sentence if no topic-specific content found
	return sentences[0].strip() + '.' if sentences else "IDSA clinical practice guideline."

	def _generate_question_summary(self, guidelines: List[Dict], topic: str, specific_focus: str) -> str:
	"""Generate a concise summary that answers the user's question based on the guidelines found."""
	if not guidelines:
	return f"No IDSA guidelines found specifically addressing {topic}."

	# Build the summary based on the specific focus or general topic
	if specific_focus:
	question_context = f"{topic} {specific_focus}"
	else:
	question_context = topic

	# Extract key information from the guidelines
	key_points = []
	recommendations = []

	for guideline in guidelines:
	# Get key recommendations
	guideline_recs = guideline.get('key_recommendations', [])
	recommendations.extend(guideline_recs[:2]) # Take top 2 from each guideline

	# Extract key points from summary
	summary = guideline.get('summary', '')
	if summary and len(summary) > 20:
	key_points.append(summary)

	# Build the summary
	summary_parts = []

	# Start with context
	summary_parts.append(f"Based on IDSA guidelines for {question_context}:")

	# Add key recommendations if available
	if recommendations:
	summary_parts.append("\nKey Recommendations:")
	for i, rec in enumerate(recommendations[:3], 1): # Limit to top 3
	summary_parts.append(f"{i}. {rec.strip()}")

	# Add general guidance from guidelines
	if key_points:
	summary_parts.append(f"\nClinical Guidance:")
	# Combine and summarize key points
	combined_guidance = ' '.join(key_points[:2]) # Use first 2 summaries
	# Extract most relevant sentences
	sentences = combined_guidance.split('.')
	relevant_sentences = [s.strip() for s in sentences if len(s.strip()) > 30][:2]
	for sentence in relevant_sentences:
	if sentence:
	summary_parts.append(f"• {sentence}.")

	# Add specific guidance based on common scenarios
	if topic.lower() in ['tuberculosis', 'tb']:
	if 'quantiferon' in (specific_focus or '').lower() or 'igra' in (specific_focus or '').lower():
	summary_parts.append(f"\nFor undetermined IGRA/QuantiFERON results: Consider clinical risk factors, repeat testing, or alternative diagnostic approaches as outlined in the guidelines.")

	# Combine all parts
	full_summary = '\n'.join(summary_parts)

	# Ensure summary is not too long
	if len(full_summary) > 500:
	# Truncate and add ellipsis
	full_summary = full_summary[:497] + "..."

	return full_summary