import re
import json
import requests
import html as html_lib
import time
from typing import Optional
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, PlainTextResponse
import random
app = FastAPI()
POLLINATIONS_URL = "https://text.pollinations.ai/prompt/"
# ---- Prompt templates ----
# STEP 1: Get the article structure
HEADLINES_PROMPT = """
You are an AI that produces a table of contents, for a neutral, encyclopedic Wikipedia-style article.
Write about the topic: "{topic}".
Output ONLY valid JSON and NOTHING else. Do not add explanatory text, headers, markdown or code fences.
Format exactly:
{{
"title": "string",
"lead": "string",
"sections": [
{{
"heading": "string",
"subsections": [
{{
"subheading": "string"
}}
]
}}
],
"last_edited": "string" /* optional */
}}
"""
# STEP 2: Get all content for the structure in a single call
ARTICLE_PROMPT = """
You are an AI that writes a complete, neutral, and detailed encyclopedic Wikipedia-style article.
The topic is "{topic}".
You have been given a JSON structure containing headings and subheadings. Your task is to write the content for this structure.
Instructions:
1. **Content Depth:** Write a detailed paragraph for each heading and subheading. Paragraphs for the main headings should be especially comprehensive, consisting of several sentences to provide a thorough overview of the section's topic.
2. **Structure:** Do not invent new sections. Stick strictly to the provided input structure.
3. **Output Format:** Output ONLY a valid JSON object and NOTHING else. The output JSON must have the exact same structure as the input, but with a "content" field added to each section and subsection.
Input Structure:
{structure_json}
Output Format Example:
{{
"sections": [
{{
"heading": "History",
"content": "The history of the topic is long and varied, with early concepts dating back to ancient philosophy. Key developments in the 20th century, particularly the work on [[Turing Machines]], laid the groundwork for the modern field.",
"subsections": [
{{
"subheading": "Early developments",
"content": "In the early days, developments were slow and often theoretical..."
}}
]
}}
]
}}
"""
# ---- In-memory raw log (topic -> list of (prompt, response)) ----
RAW_LOG = {}
# ---- Utility functions ----
def call_pollinations(prompt: str) -> str:
"""Call Pollinations and return the raw text response (no stripping)."""
uri = POLLINATIONS_URL + requests.utils.requote_uri(prompt) + "?token=ZJyDM8G0LiZnNxFf&model=gemini&json=true&seed="+str(random.randint(0,999999))
r = requests.get(uri, timeout=60) # Increased timeout for larger generation
r.raise_for_status()
return r.text # preserve raw
def extract_json(text: str) -> dict:
"""Extract and parse the first JSON object found between first '{' and last '}'."""
start = text.find("{")
end = text.rfind("}") + 1
if start == -1 or end == -1 or start >= end:
raise ValueError("No JSON object found in AI response.\n\nRaw (truncated):\n" + text[:2000])
json_str = text[start:end]
try:
return json.loads(json_str)
except Exception as e:
raise ValueError(f"Failed to parse JSON: {e}\n\nExtracted (truncated):\n{json_str[:2000]}\n\nRaw (truncated):\n{text[:2000]}")
def log_raw(topic: str, prompt: str, response: str):
"""Append a prompt/response pair to RAW_LOG for the topic."""
RAW_LOG.setdefault(topic, []).append((prompt, response))
# ---- Two-step generation functions ----
def generate_headlines(topic: str) -> dict:
"""Step 1: Get the article structure (TOC)."""
prompt = HEADLINES_PROMPT.format(topic=topic)
resp = call_pollinations(prompt)
log_raw(topic, prompt, resp)
data = extract_json(resp)
# Normalize structure
data.setdefault("title", topic.replace("_", " "))
data.setdefault("lead", data.get("lead", ""))
data.setdefault("sections", data.get("sections", []))
return data
def generate_article_content(topic: str, toc_structure: dict) -> dict:
"""Step 2: Generate all content for the given structure in one call."""
# Create a clean version of the structure for the prompt
structure_for_prompt = {
"sections": [
{
"heading": s.get("heading"),
"subsections": s.get("subsections", [])
} for s in toc_structure.get("sections", [])
]
}
structure_json = json.dumps(structure_for_prompt, indent=2)
prompt = ARTICLE_PROMPT.format(topic=topic, structure_json=structure_json)
resp = call_pollinations(prompt)
log_raw(topic, prompt, resp)
data = extract_json(resp)
return data
# ---- Renderer ----
def esc(s): return html_lib.escape(s) if isinstance(s, str) else ""
def render_page(article: dict, execution_time: Optional[float] = None) -> str:
"""Render final HTML page from the fully-populated article JSON."""
title = esc(article.get("title", "Untitled"))
lead = esc(article.get("lead", ""))
css = """body{font-family:sans-serif;margin:0;background:#f6f6f7;color:#202122}#container{display:flex;min-height:100vh}#left-sidebar{width:18%;padding:1.2em;background:#f6f6f7;border-right:1px solid #a7d7f9;box-sizing:border-box}#main-content{width:82%;padding:1.6em;background:#fff;box-sizing:border-box}header{display:flex;justify-content:space-between;align-items:center;border-bottom:1px solid #a7d7f9;padding-bottom:.6em;margin-bottom:1em}#main-title{font-family:Georgia,serif;font-size:2em;margin:0 0 .2em 0;font-weight:normal}.site-sub{color:#54595d;margin-top:0;font-size:.95em}h2{font-size:1.3em;margin-top:1.2em;border-bottom:1px solid #a2a9b1;padding-bottom:.2em;font-weight:normal}h3{font-size:1.05em;margin-top:.8em}p{line-height:1.6}#toc{background:#f8f9fa;border:1px solid #a2a9b1;padding:1em;margin-bottom:1em;display:inline-block}footer{margin-top:2em;border-top:1px solid #a2a9b1;padding-top:1em;color:#54595d;font-size:.85em}.references ol{padding-left:1.2em}"""
parts = [
"
")
for i, sec in enumerate(article.get("sections", []), 1):
parts.append(f"
{esc(sec.get('heading',''))}
")
if sec.get("content"): parts.append(f"
{esc(sec.get('content',''))}
")
for j, sub in enumerate(sec.get("subsections", []) or [], 1):
parts.append(f"
{esc(sub.get('subheading',''))}
")
if sub.get("content"): parts.append(f"
{esc(sub.get('content',''))}
")
footer_parts = []
if article.get("last_edited"): footer_parts.append(f"This page was last edited on {esc(article.get('last_edited', ''))}")
if execution_time is not None: footer_parts.append(f"Page generated in {execution_time:.2f} seconds")
footer_content = " • ".join(footer_parts)
parts.append(f"
")
js = """
"""
parts.append(js)
return "\n".join(parts)
# ---- API Routes ----
@app.get("/wikipedai/{topic}", response_class=HTMLResponse)
def wikipedai(topic: str):
start_time = time.time()
RAW_LOG[topic] = []
try:
# Step 1: Get the article structure (title, lead, headings)
article_structure = generate_headlines(topic)
# Step 2: Get all content for that structure in a single API call
article_content = generate_article_content(topic, article_structure)
# Step 3: Merge the content back into the original structure
# This assumes the AI returned the sections in the same order, which it should.
content_sections = article_content.get("sections", [])
for i, section_structure in enumerate(article_structure.get("sections", [])):
if i < len(content_sections):
# Add content to the main section
section_structure["content"] = content_sections[i].get("content", "[Content not generated]")
# Add content to subsections
content_subsections = content_sections[i].get("subsections", [])
for j, sub_structure in enumerate(section_structure.get("subsections", [])):
if j < len(content_subsections):
sub_structure["content"] = content_subsections[j].get("content", "[Content not generated]")
# Final render
elapsed_time = time.time() - start_time
html = render_page(article_structure, execution_time=elapsed_time)
return HTMLResponse(content=html, status_code=200)
except Exception as e:
# Capture the full traceback for better debugging
import traceback
error_details = f"Error: {e}\n\nTraceback:\n{traceback.format_exc()}"
return HTMLResponse(content=f"
Error
{html_lib.escape(error_details)}
", status_code=500)
@app.get("/raw/{topic}", response_class=PlainTextResponse)
def raw(topic: str):
entries = RAW_LOG.get(topic, [])
if not entries:
return PlainTextResponse(f"No raw log found for topic '{topic}'. Try calling /wikipedai/{topic} first.", status_code=404)
out_lines = []
for idx, (prompt, resp) in enumerate(entries, start=1):
out_lines.append(f"--- Input [{idx}] ---\n{prompt}\n\n--- AI response [{idx}] ---\n{resp}\n")
return PlainTextResponse("\n".join(out_lines), status_code=200)