from langchain.tools import tool
from bs4 import BeautifulSoup
import requests
import datetime
import json
import time
import re

def clean_text(text):
    """Clean text from HTML tags and extra whitespace"""
    if not text:
        return ""
    text = re.sub(r'<[^>]+>', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

@tool
def pmc_search(query: str) -> str:
    """Search PubMed Central (PMC) for articles"""
    try:
        # Base URLs for PubMed APIs
        search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
        fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
        
        # Search parameters
        search_params = {
            "db": "pmc",
            "term": query,
            "retmax": 20,
            "retmode": "json",
            "sort": "relevance"
        }
        
        # Get article IDs
        response = requests.get(search_url, params=search_params)
        if not response.ok:
            return json.dumps([{"error": "PubMed search failed"}])
            
        try:
            search_data = response.json()
            article_ids = search_data.get("esearchresult", {}).get("idlist", [])
        except:
            # Fallback to XML parsing if JSON fails
            soup = BeautifulSoup(response.text, 'xml')
            article_ids = [id.text for id in soup.find_all('Id')]
        
        articles = []
        for pmid in article_ids:
            try:
                # Fetch article details
                fetch_params = {
                    "db": "pmc",
                    "id": pmid,
                    "retmode": "xml"
                }
                article_response = requests.get(fetch_url, params=fetch_params)
                if not article_response.ok:
                    continue
                    
                article_soup = BeautifulSoup(article_response.text, 'xml')
                
                # Extract article data
                title_elem = article_soup.find("article-title")
                title = clean_text(title_elem.text if title_elem else "No title")
                
                abstract_elem = article_soup.find("abstract")
                abstract = clean_text(abstract_elem.text if abstract_elem else "No abstract")
                
                authors = []
                for author in article_soup.find_all(["author", "contrib"]):
                    surname = author.find(["surname", "last-name"])
                    given_name = author.find(["given-names", "first-name"])
                    if surname:
                        author_name = surname.text
                        if given_name:
                            author_name = f"{given_name.text} {author_name}"
                        authors.append(clean_text(author_name))
                
                year_elem = article_soup.find(["pub-date", "year"])
                year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"
                
                journal_elem = article_soup.find(["journal-title", "source"])
                journal = clean_text(journal_elem.text if journal_elem else "Unknown Journal")
                
                articles.append({
                    "id": pmid,
                    "title": title,
                    "authors": authors,
                    "year": year,
                    "journal": journal,
                    "abstract": abstract
                })
                
                # Add delay to avoid rate limiting
                time.sleep(0.5)
                
            except Exception as e:
                continue
        
        return json.dumps(articles, indent=2)
        
    except Exception as e:
        return json.dumps([{"error": f"PMC search failed: {str(e)}"}])

@tool
def google_scholar_search(query: str) -> str:
    """Search alternative sources for medical literature"""
    try:
        # Use alternative medical literature sources
        search_urls = [
            f"https://europepmc.org/webservices/rest/search?query={query}&format=json&pageSize=20",
            f"https://api.semanticscholar.org/graph/v1/paper/search?query={query}&limit=20&fields=title,abstract,year,authors,venue"
        ]
        
        results = []
        
        for url in search_urls:
            try:
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
                }
                response = requests.get(url, headers=headers)
                
                if not response.ok:
                    continue
                
                data = response.json()
                
                # Handle Europe PMC response
                if "resultList" in data:
                    for result in data["resultList"].get("result", []):
                        pub = {
                            "title": clean_text(result.get("title", "No title")),
                            "authors": [clean_text(author.get("fullName", "Unknown")) for author in result.get("authorList", {}).get("author", [])],
                            "year": result.get("pubYear", "Unknown"),
                            "journal": clean_text(result.get("journalTitle", "Unknown Journal")),
                            "abstract": clean_text(result.get("abstractText", "No abstract")),
                            "source": "Europe PMC"
                        }
                        if pub["title"] != "No title" and pub["abstract"] != "No abstract":
                            results.append(pub)
                
                # Handle Semantic Scholar response
                elif "data" in data:
                    for paper in data["data"]:
                        pub = {
                            "title": clean_text(paper.get("title", "No title")),
                            "authors": [clean_text(author.get("name", "Unknown")) for author in paper.get("authors", [])],
                            "year": paper.get("year", "Unknown"),
                            "journal": clean_text(paper.get("venue", "Unknown Journal")),
                            "abstract": clean_text(paper.get("abstract", "No abstract")),
                            "source": "Semantic Scholar"
                        }
                        if pub["title"] != "No title" and pub["abstract"] != "No abstract":
                            results.append(pub)
                
                time.sleep(1)  # Rate limiting
                
            except Exception as e:
                continue
        
        return json.dumps(results, indent=2)
        
    except Exception as e:
        return json.dumps([{"error": f"Literature search failed: {str(e)}"}])

@tool
def today_tool() -> str:
    """Get today's date"""
    return str(datetime.date.today())