File size: 6,941 Bytes
aaa7944
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from langchain.tools import tool
from bs4 import BeautifulSoup
import requests
import datetime
import json
import time
import re

def clean_text(text):
    """Clean text from HTML tags and extra whitespace"""
    if not text:
        return ""
    text = re.sub(r'<[^>]+>', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

@tool
def pmc_search(query: str) -> str:
    """Search PubMed Central (PMC) for articles"""
    try:
        # Base URLs for PubMed APIs
        search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
        fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
        
        # Search parameters
        search_params = {
            "db": "pmc",
            "term": query,
            "retmax": 20,
            "retmode": "json",
            "sort": "relevance"
        }
        
        # Get article IDs
        response = requests.get(search_url, params=search_params)
        if not response.ok:
            return json.dumps([{"error": "PubMed search failed"}])
            
        try:
            search_data = response.json()
            article_ids = search_data.get("esearchresult", {}).get("idlist", [])
        except:
            # Fallback to XML parsing if JSON fails
            soup = BeautifulSoup(response.text, 'xml')
            article_ids = [id.text for id in soup.find_all('Id')]
        
        articles = []
        for pmid in article_ids:
            try:
                # Fetch article details
                fetch_params = {
                    "db": "pmc",
                    "id": pmid,
                    "retmode": "xml"
                }
                article_response = requests.get(fetch_url, params=fetch_params)
                if not article_response.ok:
                    continue
                    
                article_soup = BeautifulSoup(article_response.text, 'xml')
                
                # Extract article data
                title_elem = article_soup.find("article-title")
                title = clean_text(title_elem.text if title_elem else "No title")
                
                abstract_elem = article_soup.find("abstract")
                abstract = clean_text(abstract_elem.text if abstract_elem else "No abstract")
                
                authors = []
                for author in article_soup.find_all(["author", "contrib"]):
                    surname = author.find(["surname", "last-name"])
                    given_name = author.find(["given-names", "first-name"])
                    if surname:
                        author_name = surname.text
                        if given_name:
                            author_name = f"{given_name.text} {author_name}"
                        authors.append(clean_text(author_name))
                
                year_elem = article_soup.find(["pub-date", "year"])
                year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"
                
                journal_elem = article_soup.find(["journal-title", "source"])
                journal = clean_text(journal_elem.text if journal_elem else "Unknown Journal")
                
                articles.append({
                    "id": pmid,
                    "title": title,
                    "authors": authors,
                    "year": year,
                    "journal": journal,
                    "abstract": abstract
                })
                
                # Add delay to avoid rate limiting
                time.sleep(0.5)
                
            except Exception as e:
                continue
        
        return json.dumps(articles, indent=2)
        
    except Exception as e:
        return json.dumps([{"error": f"PMC search failed: {str(e)}"}])

@tool
def google_scholar_search(query: str) -> str:
    """Search alternative sources for medical literature"""
    try:
        # Use alternative medical literature sources
        search_urls = [
            f"https://europepmc.org/webservices/rest/search?query={query}&format=json&pageSize=20",
            f"https://api.semanticscholar.org/graph/v1/paper/search?query={query}&limit=20&fields=title,abstract,year,authors,venue"
        ]
        
        results = []
        
        for url in search_urls:
            try:
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
                }
                response = requests.get(url, headers=headers)
                
                if not response.ok:
                    continue
                
                data = response.json()
                
                # Handle Europe PMC response
                if "resultList" in data:
                    for result in data["resultList"].get("result", []):
                        pub = {
                            "title": clean_text(result.get("title", "No title")),
                            "authors": [clean_text(author.get("fullName", "Unknown")) for author in result.get("authorList", {}).get("author", [])],
                            "year": result.get("pubYear", "Unknown"),
                            "journal": clean_text(result.get("journalTitle", "Unknown Journal")),
                            "abstract": clean_text(result.get("abstractText", "No abstract")),
                            "source": "Europe PMC"
                        }
                        if pub["title"] != "No title" and pub["abstract"] != "No abstract":
                            results.append(pub)
                
                # Handle Semantic Scholar response
                elif "data" in data:
                    for paper in data["data"]:
                        pub = {
                            "title": clean_text(paper.get("title", "No title")),
                            "authors": [clean_text(author.get("name", "Unknown")) for author in paper.get("authors", [])],
                            "year": paper.get("year", "Unknown"),
                            "journal": clean_text(paper.get("venue", "Unknown Journal")),
                            "abstract": clean_text(paper.get("abstract", "No abstract")),
                            "source": "Semantic Scholar"
                        }
                        if pub["title"] != "No title" and pub["abstract"] != "No abstract":
                            results.append(pub)
                
                time.sleep(1)  # Rate limiting
                
            except Exception as e:
                continue
        
        return json.dumps(results, indent=2)
        
    except Exception as e:
        return json.dumps([{"error": f"Literature search failed: {str(e)}"}])

@tool
def today_tool() -> str:
    """Get today's date"""
    return str(datetime.date.today())