import re
import json
import requests
import html as html_lib
import time
from typing import Optional
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, PlainTextResponse
import random

app = FastAPI()

POLLINATIONS_URL = "https://text.pollinations.ai/prompt/"

# ---- Prompt templates ----

# STEP 1: Get the article structure
HEADLINES_PROMPT = """
You are an AI that produces a table of contents, for a neutral, encyclopedic Wikipedia-style article.
Write about the topic: "{topic}".
Output ONLY valid JSON and NOTHING else. Do not add explanatory text, headers, markdown or code fences.
Format exactly:
{{
  "title": "string",
  "lead": "string",
  "sections": [
    {{
      "heading": "string",
      "subsections": [
        {{
          "subheading": "string"
        }}
      ]
    }}
  ],
  "last_edited": "string"   /* optional */
}}
"""

# STEP 2: Get all content for the structure in a single call
ARTICLE_PROMPT = """
You are an AI that writes a complete, neutral, and detailed encyclopedic Wikipedia-style article.
The topic is "{topic}".
You have been given a JSON structure containing headings and subheadings. Your task is to write the content for this structure.

Instructions:
1.  **Content Depth:** Write a detailed paragraph for each heading and subheading. Paragraphs for the main headings should be especially comprehensive, consisting of several sentences to provide a thorough overview of the section's topic.
2.  **Structure:** Do not invent new sections. Stick strictly to the provided input structure.
3.  **Output Format:** Output ONLY a valid JSON object and NOTHING else. The output JSON must have the exact same structure as the input, but with a "content" field added to each section and subsection.

Input Structure:
{structure_json}

Output Format Example:
{{
  "sections": [
    {{
      "heading": "History",
      "content": "The history of the topic is long and varied, with early concepts dating back to ancient philosophy. Key developments in the 20th century, particularly the work on [[Turing Machines]], laid the groundwork for the modern field.",
      "subsections": [
        {{
          "subheading": "Early developments",
          "content": "In the early days, developments were slow and often theoretical..."
        }}
      ]
    }}
  ]
}}
"""

# ---- In-memory raw log (topic -> list of (prompt, response)) ----
RAW_LOG = {}

# ---- Utility functions ----
def call_pollinations(prompt: str) -> str:
    """Call Pollinations and return the raw text response (no stripping)."""
    uri = POLLINATIONS_URL + requests.utils.requote_uri(prompt) + "?token=ZJyDM8G0LiZnNxFf&model=gemini&json=true&seed="+str(random.randint(0,999999))
    r = requests.get(uri, timeout=60) # Increased timeout for larger generation
    r.raise_for_status()
    return r.text  # preserve raw

def extract_json(text: str) -> dict:
    """Extract and parse the first JSON object found between first '{' and last '}'."""
    start = text.find("{")
    end = text.rfind("}") + 1
    if start == -1 or end == -1 or start >= end:
        raise ValueError("No JSON object found in AI response.\n\nRaw (truncated):\n" + text[:2000])
    json_str = text[start:end]
    try:
        return json.loads(json_str)
    except Exception as e:
        raise ValueError(f"Failed to parse JSON: {e}\n\nExtracted (truncated):\n{json_str[:2000]}\n\nRaw (truncated):\n{text[:2000]}")

def log_raw(topic: str, prompt: str, response: str):
    """Append a prompt/response pair to RAW_LOG for the topic."""
    RAW_LOG.setdefault(topic, []).append((prompt, response))

# ---- Two-step generation functions ----
def generate_headlines(topic: str) -> dict:
    """Step 1: Get the article structure (TOC)."""
    prompt = HEADLINES_PROMPT.format(topic=topic)
    resp = call_pollinations(prompt)
    log_raw(topic, prompt, resp)
    data = extract_json(resp)
    # Normalize structure
    data.setdefault("title", topic.replace("_", " "))
    data.setdefault("lead", data.get("lead", ""))
    data.setdefault("sections", data.get("sections", []))
    return data

def generate_article_content(topic: str, toc_structure: dict) -> dict:
    """Step 2: Generate all content for the given structure in one call."""
    # Create a clean version of the structure for the prompt
    structure_for_prompt = {
        "sections": [
            {
                "heading": s.get("heading"),
                "subsections": s.get("subsections", [])
            } for s in toc_structure.get("sections", [])
        ]
    }
    structure_json = json.dumps(structure_for_prompt, indent=2)
    
    prompt = ARTICLE_PROMPT.format(topic=topic, structure_json=structure_json)
    resp = call_pollinations(prompt)
    log_raw(topic, prompt, resp)
    data = extract_json(resp)
    return data

# ---- Renderer ----
def esc(s): return html_lib.escape(s) if isinstance(s, str) else ""

def render_page(article: dict, execution_time: Optional[float] = None) -> str:
    """Render final HTML page from the fully-populated article JSON."""
    title = esc(article.get("title", "Untitled"))
    lead = esc(article.get("lead", ""))
    
    css = """body{font-family:sans-serif;margin:0;background:#f6f6f7;color:#202122}#container{display:flex;min-height:100vh}#left-sidebar{width:18%;padding:1.2em;background:#f6f6f7;border-right:1px solid #a7d7f9;box-sizing:border-box}#main-content{width:82%;padding:1.6em;background:#fff;box-sizing:border-box}header{display:flex;justify-content:space-between;align-items:center;border-bottom:1px solid #a7d7f9;padding-bottom:.6em;margin-bottom:1em}#main-title{font-family:Georgia,serif;font-size:2em;margin:0 0 .2em 0;font-weight:normal}.site-sub{color:#54595d;margin-top:0;font-size:.95em}h2{font-size:1.3em;margin-top:1.2em;border-bottom:1px solid #a2a9b1;padding-bottom:.2em;font-weight:normal}h3{font-size:1.05em;margin-top:.8em}p{line-height:1.6}#toc{background:#f8f9fa;border:1px solid #a2a9b1;padding:1em;margin-bottom:1em;display:inline-block}footer{margin-top:2em;border-top:1px solid #a2a9b1;padding-top:1em;color:#54595d;font-size:.85em}.references ol{padding-left:1.2em}"""

    parts = [
        "<!doctype html><html lang='en'><head><meta charset='utf-8'>",
        f"<title>{title} - Wikipedai</title>",
        "<link rel='icon' href='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai.png'>",
        f"<style>{css}</style></head><body><div id='container'><div id='left-sidebar'>",
        "<div style='text-align:center;margin-bottom:1em;'><a href='/'><img src='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai_logo.png' alt='logo' style='width:90px'></a></div>",
        "<div style='margin-bottom:1em;'><strong>Main menu</strong><ul style='padding-left:1em;'><li><a href='#'>Main page</a></li><li><a href='#'>Contents</a></li><li><a href='#'>Random article</a></li></ul></div></div>",
        "<div id='main-content'><header><div><a href='#'>Article</a> • <a href='#'>Talk</a></div><div><input placeholder='Search' id='search_bar' style='padding:.4em;border:1px solid #a2a9b1'></div></header>",
        f"<main><h1 id='main-title'>{title}</h1><p class='site-sub'>From Wikipedai, the free encyclopedai</p>",
    ]

    if lead: parts.append(f"<p><strong>{lead}</strong></p>")

    if article.get("sections"):
        parts.append("<div id='toc'><h2>Contents</h2><ul>")
        for i, sec in enumerate(article.get("sections", []), 1):
            parts.append(f"<li><a href='#sec{i}'>{i}. {esc(sec.get('heading',''))}</a></li>")
            if sec.get("subsections"):
                parts.append("<ul>")
                for j, sub in enumerate(sec.get("subsections", []), 1):
                    parts.append(f"<li><a href='#sec{i}_sub{j}'>{i}.{j} {esc(sub.get('subheading',''))}</a></li>")
                parts.append("</ul>")
        parts.append("</ul></div>")


    for i, sec in enumerate(article.get("sections", []), 1):
        parts.append(f"<h2 id='sec{i}'><span class='mw-headline'>{esc(sec.get('heading',''))}</span></h2>")
        if sec.get("content"): parts.append(f"<p>{esc(sec.get('content',''))}</p>")
        for j, sub in enumerate(sec.get("subsections", []) or [], 1):
            parts.append(f"<h3 id='sec{i}_sub{j}'><span class='mw-headline'>{esc(sub.get('subheading',''))}</span></h3>")
            if sub.get("content"): parts.append(f"<p>{esc(sub.get('content',''))}</p>")

    footer_parts = []
    if article.get("last_edited"): footer_parts.append(f"This page was last edited on {esc(article.get('last_edited', ''))}")
    if execution_time is not None: footer_parts.append(f"Page generated in {execution_time:.2f} seconds")
    footer_content = " • ".join(footer_parts)

    parts.append(f"</main><footer>{footer_content}</footer></div></div></body></html>")

    js = """
        <script>
        document.getElementById('search_bar').addEventListener('keydown', function(event) {
            // Check if the key pressed was 'Enter'
            if (event.key === 'Enter') {
                // Prevent any default action
                event.preventDefault();

                // Get the user's query from the input field
                const query = document.getElementById('search_bar').value;

                // If the query is empty, do nothing
                if (!query) {
                    return;
                }

                // URI-encode the query to handle special characters safely
                const encodedQuery = encodeURIComponent(query);

                // Construct the final URL for the API
                const apiUrl = `https://nihalgazi-wikipedai.hf.space/wikipedai/${encodedQuery}`;

                // Redirect the browser to the API URL
                window.location.href = apiUrl;
            }
        });
        </script>
    """

    parts.append(js)
    return "\n".join(parts)

# ---- API Routes ----

@app.get("/wikipedai/{topic}", response_class=HTMLResponse)
def wikipedai(topic: str):
    start_time = time.time()
    RAW_LOG[topic] = []

    try:
        # Step 1: Get the article structure (title, lead, headings)
        article_structure = generate_headlines(topic)

        # Step 2: Get all content for that structure in a single API call
        article_content = generate_article_content(topic, article_structure)
        
        # Step 3: Merge the content back into the original structure
        # This assumes the AI returned the sections in the same order, which it should.
        content_sections = article_content.get("sections", [])
        for i, section_structure in enumerate(article_structure.get("sections", [])):
            if i < len(content_sections):
                # Add content to the main section
                section_structure["content"] = content_sections[i].get("content", "[Content not generated]")
                
                # Add content to subsections
                content_subsections = content_sections[i].get("subsections", [])
                for j, sub_structure in enumerate(section_structure.get("subsections", [])):
                    if j < len(content_subsections):
                        sub_structure["content"] = content_subsections[j].get("content", "[Content not generated]")

        # Final render
        elapsed_time = time.time() - start_time
        html = render_page(article_structure, execution_time=elapsed_time)
        return HTMLResponse(content=html, status_code=200)

    except Exception as e:
        # Capture the full traceback for better debugging
        import traceback
        error_details = f"Error: {e}\n\nTraceback:\n{traceback.format_exc()}"
        return HTMLResponse(content=f"<h1>Error</h1><pre>{html_lib.escape(error_details)}</pre>", status_code=500)

@app.get("/raw/{topic}", response_class=PlainTextResponse)
def raw(topic: str):
    entries = RAW_LOG.get(topic, [])
    if not entries:
        return PlainTextResponse(f"No raw log found for topic '{topic}'. Try calling /wikipedai/{topic} first.", status_code=404)

    out_lines = []
    for idx, (prompt, resp) in enumerate(entries, start=1):
        out_lines.append(f"--- Input [{idx}] ---\n{prompt}\n\n--- AI response [{idx}] ---\n{resp}\n")
    return PlainTextResponse("\n".join(out_lines), status_code=200)