Spaces:
Sleeping
Sleeping
| """ | |
| retrieve_guidelines.py | |
| ---------------------- | |
| Tool for retrieving clinical practice guidelines, with focus on IDSA (Infectious Diseases Society of America) guidelines. | |
| This tool searches for and retrieves the most current clinical guidelines based on user queries about specific | |
| infectious disease topics, conditions, or pathogens. It leverages internet search to find official IDSA | |
| guidelines and extracts key recommendations, treatment algorithms, and clinical guidance. | |
| Key Features: | |
| - Searches official IDSA website and trusted medical sources | |
| - Filters results by relevance to specific infectious disease topics | |
| - Extracts key recommendations and treatment guidance | |
| - Provides proper citations and publication dates | |
| - Handles multiple guideline topics (pneumonia, UTI, sepsis, etc.) | |
| """ | |
| import asyncio | |
| import re | |
| from typing import Any, Dict, List, Union | |
| from tools.base import Tool | |
| from tools.utils import ToolExecutionError, logger | |
| class RetrieveGuidelinesTool(Tool): | |
| """ | |
| Tool for retrieving clinical practice guidelines, with focus on IDSA guidelines. | |
| This tool searches for current IDSA guidelines based on user queries about specific | |
| infectious disease conditions, pathogens, or clinical scenarios. | |
| """ | |
| def __init__(self) -> None: | |
| """Initialize the RetrieveGuidelinesTool.""" | |
| super().__init__() | |
| self.name = "retrieve_guidelines" | |
| self.description = "Retrieve clinical practice guidelines for specific infectious disease topics, conditions, or pathogens, with focus on IDSA guidelines." | |
| self.args_schema = { | |
| "type": "object", | |
| "properties": { | |
| "topic": { | |
| "type": "string", | |
| "description": "The infectious disease topic, condition, or pathogen to search for (e.g., 'pneumonia', 'UTI', 'sepsis', 'MRSA', 'C. difficile')" | |
| }, | |
| "specific_focus": { | |
| "type": "string", | |
| "description": "Optional: Specific aspect of the topic (e.g., 'treatment', 'diagnosis', 'prophylaxis', 'pediatric')", | |
| "default": "" | |
| } | |
| }, | |
| "required": ["topic"] | |
| } | |
| def openai_spec(self, legacy=False): | |
| """Return OpenAI function specification.""" | |
| return { | |
| "name": self.name, | |
| "description": self.description, | |
| "parameters": self.args_schema | |
| } | |
| async def run( | |
| self, | |
| topic: str, | |
| specific_focus: str = "" | |
| ) -> Union[List[Dict[str, Any]], Dict[str, Any]]: | |
| """ | |
| Retrieve the latest IDSA guidelines for the specified topic. | |
| Args: | |
| topic (str): The infectious disease topic to search for | |
| specific_focus (str, optional): Specific aspect to focus on | |
| Returns: | |
| Union[List[Dict[str, Any]], Dict[str, Any]]: Guidelines information or error dict | |
| """ | |
| try: | |
| # Import internet search tool | |
| from tools.internet_search import InternetSearchTool | |
| internet_tool = InternetSearchTool() | |
| # Construct search queries for IDSA guidelines | |
| search_queries = self._build_search_queries(topic, specific_focus) | |
| guidelines_data = [] | |
| for query in search_queries: | |
| try: | |
| # Search for guidelines | |
| search_results = await internet_tool.run(query) | |
| # Parse the string response into structured data | |
| if isinstance(search_results, str): | |
| parsed_results = self._parse_search_results(search_results) | |
| # Filter and process results | |
| relevant_guidelines = self._filter_idsa_guidelines(parsed_results, topic) | |
| guidelines_data.extend(relevant_guidelines) | |
| elif isinstance(search_results, list): | |
| # Handle list format (if returned) | |
| relevant_guidelines = self._filter_idsa_guidelines(search_results, topic) | |
| guidelines_data.extend(relevant_guidelines) | |
| except Exception as e: | |
| logger.warning(f"Search failed for query '{query}': {e}") | |
| continue | |
| # Remove duplicates and sort by relevance | |
| guidelines_data = self._deduplicate_and_rank(guidelines_data, topic) | |
| if not guidelines_data: | |
| # Fallback: try broader search for general treatment guidelines | |
| fallback_queries = [ | |
| f"site:idsociety.org {topic} treatment", | |
| f"site:idsociety.org {topic} management", | |
| f"site:idsociety.org {topic} clinical", | |
| f"IDSA {topic} therapy" | |
| ] | |
| for query in fallback_queries: | |
| try: | |
| search_results = await internet_tool.run(query) | |
| if isinstance(search_results, str): | |
| parsed_results = self._parse_search_results(search_results) | |
| relevant_guidelines = self._filter_idsa_guidelines(parsed_results, topic) | |
| guidelines_data.extend(relevant_guidelines) | |
| elif isinstance(search_results, list): | |
| relevant_guidelines = self._filter_idsa_guidelines(search_results, topic) | |
| guidelines_data.extend(relevant_guidelines) | |
| except Exception as e: | |
| continue | |
| guidelines_data = self._deduplicate_and_rank(guidelines_data, topic) | |
| if not guidelines_data: | |
| return { | |
| "error": f"No IDSA guidelines found for topic: {topic}", | |
| "suggestion": "Try searching for broader terms like 'infectious diseases', 'antimicrobial therapy', or specific pathogens. Note: IDSA may not have specific guidelines for all conditions.", | |
| "topic": topic, | |
| "guidelines": [], | |
| "note": "This search is limited to official IDSA guidelines only. For tuberculosis, IDSA may refer to CDC or WHO guidelines as the primary authorities." | |
| } | |
| # Extract key information from top results | |
| processed_guidelines = self._extract_guideline_info(guidelines_data[:3], topic) | |
| # Generate a summary that answers the user's question | |
| question_summary = self._generate_question_summary(processed_guidelines, topic, specific_focus) | |
| return { | |
| "topic": topic, | |
| "specific_focus": specific_focus, | |
| "guidelines_found": len(processed_guidelines), | |
| "question_summary": question_summary, | |
| "guidelines": processed_guidelines, | |
| "search_timestamp": "2025-07-18", | |
| "source": "IDSA (Infectious Diseases Society of America)" | |
| } | |
| except Exception as e: | |
| logger.error(f"RetrieveGuidelinesTool failed: {e}", exc_info=True) | |
| raise ToolExecutionError(f"Failed to retrieve guidelines: {e}") | |
| def _build_search_queries(self, topic: str, specific_focus: str) -> List[str]: | |
| """Build comprehensive search queries for IDSA guidelines.""" | |
| queries = [] | |
| # Map common terms to more specific medical terms | |
| topic_mapping = { | |
| 'tuberculosis': ['tuberculosis', 'TB', 'mycobacterium tuberculosis', 'pulmonary tuberculosis'], | |
| 'pneumonia': ['pneumonia', 'community-acquired pneumonia', 'CAP', 'hospital-acquired pneumonia'], | |
| 'sepsis': ['sepsis', 'severe sepsis', 'septic shock', 'bloodstream infection'], | |
| 'meningitis': ['meningitis', 'bacterial meningitis', 'CNS infection'], | |
| 'endocarditis': ['endocarditis', 'infective endocarditis', 'valve infection'], | |
| 'uti': ['urinary tract infection', 'UTI', 'cystitis', 'pyelonephritis'] | |
| } | |
| # Get all variations of the topic | |
| topic_variations = topic_mapping.get(topic.lower(), [topic]) | |
| # Primary IDSA-specific queries | |
| for variation in topic_variations: | |
| queries.extend([ | |
| f"IDSA guidelines {variation}", | |
| f"IDSA clinical practice guidelines {variation}", | |
| f"Infectious Diseases Society of America {variation} guidelines", | |
| f"IDSA {variation} treatment guidelines", | |
| f"IDSA {variation} management recommendations", | |
| f"site:idsociety.org {variation} guidelines" | |
| ]) | |
| # Add specific focus if provided | |
| if specific_focus: | |
| for variation in topic_variations: | |
| queries.extend([ | |
| f"IDSA guidelines {variation} {specific_focus}", | |
| f"IDSA {variation} {specific_focus} recommendations" | |
| ]) | |
| # Add broader searches for less common conditions | |
| if topic.lower() in ['tuberculosis', 'tb']: | |
| queries.extend([ | |
| "IDSA mycobacterial infections guidelines", | |
| "IDSA tuberculosis screening guidelines", | |
| "IDSA latent tuberculosis treatment", | |
| "site:idsociety.org tuberculosis guidelines", | |
| "site:idsociety.org TB guidelines", | |
| "site:idsociety.org mycobacterium tuberculosis" | |
| ]) | |
| # Add year-specific searches for latest guidelines | |
| current_year = 2025 | |
| for year in [current_year, current_year-1, current_year-2]: | |
| queries.append(f"IDSA {topic} guidelines {year}") | |
| return queries[:15] # Limit to 15 most relevant queries | |
| def _parse_search_results(self, search_results_str: str) -> List[Dict]: | |
| """Parse the formatted search results string into structured data.""" | |
| results = [] | |
| # Split by entries (each entry starts with **) | |
| entries = re.split(r'\*\*([^*]+)\*\*', search_results_str) | |
| for i in range(1, len(entries), 2): # Skip first empty entry, then take every other | |
| if i + 1 < len(entries): | |
| title = entries[i].strip() | |
| content_and_link = entries[i + 1].strip() | |
| # Extract the link | |
| link_match = re.search(r'\[Read more\]\(([^)]+)\)', content_and_link) | |
| url = link_match.group(1) if link_match else "" | |
| # Extract the content (everything before the link) | |
| content = re.sub(r'\[Read more\]\([^)]+\)', '', content_and_link).strip() | |
| if title and url: | |
| results.append({ | |
| 'title': title, | |
| 'url': url, | |
| 'content': content, | |
| 'snippet': content | |
| }) | |
| return results | |
| def _filter_idsa_guidelines(self, search_results: List[Dict], topic: str) -> List[Dict]: | |
| """Filter search results to focus ONLY on official IDSA guidelines.""" | |
| filtered_results = [] | |
| for result in search_results: | |
| url = result.get('url', '').lower() | |
| title = result.get('title', '').lower() | |
| content = result.get('content', '').lower() | |
| # Check if it's from official IDSA sources ONLY | |
| is_official_idsa = any(domain in url for domain in [ | |
| 'idsociety.org', | |
| 'idsa.org', | |
| 'academic.oup.com/cid' # Clinical Infectious Diseases journal (IDSA's official journal) | |
| ]) | |
| # Check if it contains IDSA-specific guideline indicators | |
| is_idsa_guideline = any(indicator in title or indicator in content for indicator in [ | |
| 'idsa', 'infectious diseases society of america', 'infectious diseases society', | |
| 'idsa guideline', 'idsa guidelines', 'idsa clinical practice' | |
| ]) | |
| # Enhanced topic relevance check | |
| topic_keywords = self._get_topic_keywords(topic) | |
| topic_relevant = any(keyword in title or keyword in content for keyword in topic_keywords) | |
| # Only include if it's from official IDSA source AND contains guideline indicators AND is topic relevant | |
| if topic_relevant and (is_official_idsa or is_idsa_guideline): | |
| result['relevance_score'] = self._calculate_relevance_score(result, topic) | |
| filtered_results.append(result) | |
| return filtered_results | |
| def _get_topic_keywords(self, topic: str) -> List[str]: | |
| """Get relevant keywords for topic matching.""" | |
| base_keywords = [topic.lower(), *topic.lower().split()] | |
| # Add specific synonyms and related terms | |
| keyword_mapping = { | |
| 'tuberculosis': ['tuberculosis', 'tb', 'mycobacterium', 'pulmonary tb', 'latent tb', 'active tb'], | |
| 'pneumonia': ['pneumonia', 'cap', 'hospital-acquired', 'ventilator-associated', 'lung infection'], | |
| 'sepsis': ['sepsis', 'septic shock', 'bloodstream infection', 'bacteremia'], | |
| 'meningitis': ['meningitis', 'cns infection', 'bacterial meningitis', 'brain infection'], | |
| 'endocarditis': ['endocarditis', 'infective endocarditis', 'valve infection', 'heart infection'], | |
| 'uti': ['urinary tract infection', 'uti', 'cystitis', 'pyelonephritis', 'bladder infection'] | |
| } | |
| if topic.lower() in keyword_mapping: | |
| base_keywords.extend(keyword_mapping[topic.lower()]) | |
| return base_keywords | |
| def _calculate_relevance_score(self, result: Dict, topic: str) -> float: | |
| """Calculate relevance score for a search result.""" | |
| score = 0.0 | |
| url = result.get('url', '').lower() | |
| title = result.get('title', '').lower() | |
| content = result.get('content', '').lower() | |
| # Official IDSA sources get highest scores | |
| if 'idsociety.org' in url: | |
| score += 20.0 | |
| elif 'idsa.org' in url: | |
| score += 18.0 | |
| elif 'academic.oup.com/cid' in url: | |
| score += 15.0 | |
| # IDSA-specific terms get high scores | |
| idsa_terms = ['idsa', 'infectious diseases society of america', 'infectious diseases society'] | |
| for term in idsa_terms: | |
| if term in title: | |
| score += 10.0 | |
| elif term in content: | |
| score += 5.0 | |
| # Guideline-specific terms | |
| guideline_terms = ['guideline', 'guidelines', 'clinical practice', 'recommendations'] | |
| for term in guideline_terms: | |
| if term in title: | |
| score += 8.0 | |
| elif term in content: | |
| score += 4.0 | |
| # Topic relevance | |
| topic_keywords = self._get_topic_keywords(topic) | |
| for keyword in topic_keywords: | |
| if keyword in title: | |
| score += 6.0 | |
| elif keyword in content: | |
| score += 2.0 | |
| # Recency indicators | |
| recent_years = ['2025', '2024', '2023', '2022', '2021'] | |
| for year in recent_years: | |
| if year in title or year in content: | |
| score += 2.0 | |
| break | |
| return score | |
| def _deduplicate_and_rank(self, guidelines_data: List[Dict], topic: str) -> List[Dict]: | |
| """Remove duplicates and rank guidelines by relevance.""" | |
| # Remove duplicates based on URL | |
| seen_urls = set() | |
| unique_guidelines = [] | |
| for guideline in guidelines_data: | |
| url = guideline.get('url', '') | |
| if url not in seen_urls: | |
| seen_urls.add(url) | |
| unique_guidelines.append(guideline) | |
| # Sort by relevance score | |
| unique_guidelines.sort(key=lambda x: x.get('relevance_score', 0), reverse=True) | |
| return unique_guidelines | |
| def _extract_guideline_info(self, guidelines_data: List[Dict], topic: str) -> List[Dict]: | |
| """Extract key information from guideline search results.""" | |
| processed_guidelines = [] | |
| for guideline in guidelines_data: | |
| try: | |
| # Extract key information | |
| title = guideline.get('title', '') | |
| url = guideline.get('url', '') | |
| content = guideline.get('content', '') | |
| # Extract publication year | |
| pub_year = self._extract_publication_year(title, content) | |
| # Extract key recommendations | |
| recommendations = self._extract_recommendations(content) | |
| # Extract authors/organization | |
| authors = self._extract_authors(content) | |
| processed_guideline = { | |
| 'title': title, | |
| 'url': url, | |
| 'publication_year': pub_year, | |
| 'authors': authors, | |
| 'key_recommendations': recommendations, | |
| 'relevance_score': guideline.get('relevance_score', 0), | |
| 'summary': self._generate_summary(content, topic) | |
| } | |
| processed_guidelines.append(processed_guideline) | |
| except Exception as e: | |
| logger.warning(f"Failed to process guideline: {e}") | |
| continue | |
| return processed_guidelines | |
| def _extract_publication_year(self, title: str, content: str) -> str: | |
| """Extract publication year from title or content.""" | |
| # Look for years in title first | |
| year_pattern = r'\b(20\d{2})\b' | |
| for text in [title, content]: | |
| matches = re.findall(year_pattern, text) | |
| if matches: | |
| # Return the most recent year found | |
| return max(matches) | |
| return "Unknown" | |
| def _extract_recommendations(self, content: str) -> List[str]: | |
| """Extract key recommendations from guideline content.""" | |
| recommendations = [] | |
| # Look for common recommendation patterns | |
| recommendation_patterns = [ | |
| r'recommend[s]?\s+([^.]+)', | |
| r'should\s+([^.]+)', | |
| r'we\s+recommend\s+([^.]+)', | |
| r'grade\s+[AB]\s+recommendation[:\s]+([^.]+)', | |
| r'strong\s+recommendation[:\s]+([^.]+)' | |
| ] | |
| for pattern in recommendation_patterns: | |
| matches = re.findall(pattern, content, re.IGNORECASE) | |
| recommendations.extend(matches[:3]) # Limit to top 3 per pattern | |
| # Clean up recommendations | |
| cleaned_recommendations = [] | |
| for rec in recommendations: | |
| cleaned = rec.strip() | |
| if len(cleaned) > 20 and len(cleaned) < 200: # Reasonable length | |
| cleaned_recommendations.append(cleaned) | |
| return cleaned_recommendations[:5] # Return top 5 recommendations | |
| def _extract_authors(self, content: str) -> str: | |
| """Extract authors or organization from content.""" | |
| # Look for IDSA or author patterns | |
| author_patterns = [ | |
| r'infectious\s+diseases\s+society\s+of\s+america', | |
| r'idsa', | |
| r'authored?\s+by\s+([^.]+)', | |
| r'committee\s+([^.]+)' | |
| ] | |
| for pattern in author_patterns: | |
| match = re.search(pattern, content, re.IGNORECASE) | |
| if match: | |
| if 'idsa' in pattern or 'infectious' in pattern: | |
| return "Infectious Diseases Society of America (IDSA)" | |
| else: | |
| return match.group(1).strip() | |
| return "IDSA" | |
| def _generate_summary(self, content: str, topic: str) -> str: | |
| """Generate a brief summary of the guideline.""" | |
| # Extract first few sentences that mention the topic | |
| sentences = content.split('.') | |
| relevant_sentences = [] | |
| for sentence in sentences[:10]: # Check first 10 sentences | |
| if topic.lower() in sentence.lower(): | |
| relevant_sentences.append(sentence.strip()) | |
| if len(relevant_sentences) >= 2: | |
| break | |
| if relevant_sentences: | |
| return '. '.join(relevant_sentences) + '.' | |
| else: | |
| # Return first sentence if no topic-specific content found | |
| return sentences[0].strip() + '.' if sentences else "IDSA clinical practice guideline." | |
| def _generate_question_summary(self, guidelines: List[Dict], topic: str, specific_focus: str) -> str: | |
| """Generate a concise summary that answers the user's question based on the guidelines found.""" | |
| if not guidelines: | |
| return f"No IDSA guidelines found specifically addressing {topic}." | |
| # Build the summary based on the specific focus or general topic | |
| if specific_focus: | |
| question_context = f"{topic} {specific_focus}" | |
| else: | |
| question_context = topic | |
| # Extract key information from the guidelines | |
| key_points = [] | |
| recommendations = [] | |
| for guideline in guidelines: | |
| # Get key recommendations | |
| guideline_recs = guideline.get('key_recommendations', []) | |
| recommendations.extend(guideline_recs[:2]) # Take top 2 from each guideline | |
| # Extract key points from summary | |
| summary = guideline.get('summary', '') | |
| if summary and len(summary) > 20: | |
| key_points.append(summary) | |
| # Build the summary | |
| summary_parts = [] | |
| # Start with context | |
| summary_parts.append(f"Based on IDSA guidelines for {question_context}:") | |
| # Add key recommendations if available | |
| if recommendations: | |
| summary_parts.append("\n**Key Recommendations:**") | |
| for i, rec in enumerate(recommendations[:3], 1): # Limit to top 3 | |
| summary_parts.append(f"{i}. {rec.strip()}") | |
| # Add general guidance from guidelines | |
| if key_points: | |
| summary_parts.append(f"\n**Clinical Guidance:**") | |
| # Combine and summarize key points | |
| combined_guidance = ' '.join(key_points[:2]) # Use first 2 summaries | |
| # Extract most relevant sentences | |
| sentences = combined_guidance.split('.') | |
| relevant_sentences = [s.strip() for s in sentences if len(s.strip()) > 30][:2] | |
| for sentence in relevant_sentences: | |
| if sentence: | |
| summary_parts.append(f"• {sentence}.") | |
| # Add specific guidance based on common scenarios | |
| if topic.lower() in ['tuberculosis', 'tb']: | |
| if 'quantiferon' in (specific_focus or '').lower() or 'igra' in (specific_focus or '').lower(): | |
| summary_parts.append(f"\n**For undetermined IGRA/QuantiFERON results:** Consider clinical risk factors, repeat testing, or alternative diagnostic approaches as outlined in the guidelines.") | |
| # Combine all parts | |
| full_summary = '\n'.join(summary_parts) | |
| # Ensure summary is not too long | |
| if len(full_summary) > 500: | |
| # Truncate and add ellipsis | |
| full_summary = full_summary[:497] + "..." | |
| return full_summary | |