from langchain.tools import tool
from scholarly import scholarly, ProxyGenerator
from bs4 import BeautifulSoup
import requests
import datetime
import json
import time

# Configure scholarly with proxy to avoid blocking
pg = ProxyGenerator()
pg.FreeProxies()
scholarly.use_proxy(pg)

@tool
def pmc_search(query: str) -> str:
    """Search PubMed Central (PMC) for articles"""
    try:
        # Base URLs for PubMed APIs
        search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
        fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
        
        # Search parameters
        search_params = {
            "db": "pmc",
            "term": query,
            "retmax": 20,
            "retmode": "json",
            "sort": "relevance"
        }
        
        # Get article IDs
        response = requests.get(search_url, params=search_params)
        if not response.ok:
            return json.dumps([{"error": "PubMed search failed"}])
            
        try:
            search_data = response.json()
            article_ids = search_data.get("esearchresult", {}).get("idlist", [])
        except:
            # Fallback to XML parsing if JSON fails
            soup = BeautifulSoup(response.text, 'xml')
            article_ids = [id.text for id in soup.find_all('Id')]
        
        articles = []
        for pmid in article_ids:
            try:
                # Fetch article details
                fetch_params = {
                    "db": "pmc",
                    "id": pmid,
                    "retmode": "xml"
                }
                article_response = requests.get(fetch_url, params=fetch_params)
                if not article_response.ok:
                    continue
                    
                article_soup = BeautifulSoup(article_response.text, 'xml')
                
                # Extract article data
                title_elem = article_soup.find("article-title")
                title = title_elem.text if title_elem else "No title"
                
                abstract_elem = article_soup.find("abstract")
                abstract = abstract_elem.text if abstract_elem else "No abstract"
                
                authors = []
                for author in article_soup.find_all(["author", "contrib"]):
                    surname = author.find(["surname", "last-name"])
                    given_name = author.find(["given-names", "first-name"])
                    if surname:
                        author_name = surname.text
                        if given_name:
                            author_name = f"{given_name.text} {author_name}"
                        authors.append(author_name)
                
                year_elem = article_soup.find(["pub-date", "year"])
                year = year_elem.find("year").text if year_elem and year_elem.find("year") else "Unknown"
                
                journal_elem = article_soup.find(["journal-title", "source"])
                journal = journal_elem.text if journal_elem else "Unknown Journal"
                
                articles.append({
                    "id": pmid,
                    "title": title,
                    "authors": authors,
                    "year": year,
                    "journal": journal,
                    "abstract": abstract
                })
                
                # Add delay to avoid rate limiting
                time.sleep(0.5)
                
            except Exception as e:
                continue
        
        return json.dumps(articles, indent=2)
        
    except Exception as e:
        return json.dumps([{"error": f"PMC search failed: {str(e)}"}])

@tool
def google_scholar_search(query: str) -> str:
    """Search Google Scholar for articles"""
    try:
        # Configure proxy and retry mechanism
        if not scholarly.use_proxy(pg):
            pg.FreeProxies()
            scholarly.use_proxy(pg)
        
        search_query = scholarly.search_pubs(query)
        results = []
        count = 0
        max_retries = 3
        
        while count < 20:
            try:
                result = next(search_query)
                
                # Extract publication data
                pub = {
                    "title": result.bib.get('title', 'No title'),
                    "authors": result.bib.get('author', 'No author').split(" and "),
                    "year": result.bib.get('year', 'No year'),
                    "abstract": result.bib.get('abstract', 'No abstract'),
                    "journal": result.bib.get('journal', result.bib.get('venue', 'No venue')),
                    "citations": result.citedby if hasattr(result, 'citedby') else 0
                }
                
                # Skip if no title or abstract
                if pub["title"] == 'No title' or pub["abstract"] == 'No abstract':
                    continue
                    
                results.append(pub)
                count += 1
                
                # Add delay to avoid rate limiting
                time.sleep(0.5)
                
            except StopIteration:
                break
            except Exception as e:
                if max_retries > 0:
                    max_retries -= 1
                    time.sleep(1)
                    continue
                else:
                    break
        
        return json.dumps(results, indent=2)
        
    except Exception as e:
        return json.dumps([{"error": f"Google Scholar search failed: {str(e)}"}])

@tool
def today_tool() -> str:
    """Get today's date"""
    return str(datetime.date.today())