import re import json import requests import html as html_lib import time from typing import Optional from fastapi import FastAPI from fastapi.responses import HTMLResponse, PlainTextResponse import random app = FastAPI() POLLINATIONS_URL = "https://text.pollinations.ai/prompt/" # ---- Prompt templates ---- # STEP 1: Get the article structure HEADLINES_PROMPT = """ You are an AI that produces a table of contents, for a neutral, encyclopedic Wikipedia-style article. Write about the topic: "{topic}". Output ONLY valid JSON and NOTHING else. Do not add explanatory text, headers, markdown or code fences. Format exactly: {{ "title": "string", "lead": "string", "sections": [ {{ "heading": "string", "subsections": [ {{ "subheading": "string" }} ] }} ], "last_edited": "string" /* optional */ }} """ # STEP 2: Get all content for the structure in a single call ARTICLE_PROMPT = """ You are an AI that writes a complete, neutral, and detailed encyclopedic Wikipedia-style article. The topic is "{topic}". You have been given a JSON structure containing headings and subheadings. Your task is to write the content for this structure. Instructions: 1. **Content Depth:** Write a detailed paragraph for each heading and subheading. Paragraphs for the main headings should be especially comprehensive, consisting of several sentences to provide a thorough overview of the section's topic. 2. **Structure:** Do not invent new sections. Stick strictly to the provided input structure. 3. **Output Format:** Output ONLY a valid JSON object and NOTHING else. The output JSON must have the exact same structure as the input, but with a "content" field added to each section and subsection. Input Structure: {structure_json} Output Format Example: {{ "sections": [ {{ "heading": "History", "content": "The history of the topic is long and varied, with early concepts dating back to ancient philosophy. Key developments in the 20th century, particularly the work on [[Turing Machines]], laid the groundwork for the modern field.", "subsections": [ {{ "subheading": "Early developments", "content": "In the early days, developments were slow and often theoretical..." }} ] }} ] }} """ # ---- In-memory raw log (topic -> list of (prompt, response)) ---- RAW_LOG = {} # ---- Utility functions ---- def call_pollinations(prompt: str) -> str: """Call Pollinations and return the raw text response (no stripping).""" uri = POLLINATIONS_URL + requests.utils.requote_uri(prompt) + "?token=ZJyDM8G0LiZnNxFf&model=gemini&json=true&seed="+str(random.randint(0,999999)) r = requests.get(uri, timeout=60) # Increased timeout for larger generation r.raise_for_status() return r.text # preserve raw def extract_json(text: str) -> dict: """Extract and parse the first JSON object found between first '{' and last '}'.""" start = text.find("{") end = text.rfind("}") + 1 if start == -1 or end == -1 or start >= end: raise ValueError("No JSON object found in AI response.\n\nRaw (truncated):\n" + text[:2000]) json_str = text[start:end] try: return json.loads(json_str) except Exception as e: raise ValueError(f"Failed to parse JSON: {e}\n\nExtracted (truncated):\n{json_str[:2000]}\n\nRaw (truncated):\n{text[:2000]}") def log_raw(topic: str, prompt: str, response: str): """Append a prompt/response pair to RAW_LOG for the topic.""" RAW_LOG.setdefault(topic, []).append((prompt, response)) # ---- Two-step generation functions ---- def generate_headlines(topic: str) -> dict: """Step 1: Get the article structure (TOC).""" prompt = HEADLINES_PROMPT.format(topic=topic) resp = call_pollinations(prompt) log_raw(topic, prompt, resp) data = extract_json(resp) # Normalize structure data.setdefault("title", topic.replace("_", " ")) data.setdefault("lead", data.get("lead", "")) data.setdefault("sections", data.get("sections", [])) return data def generate_article_content(topic: str, toc_structure: dict) -> dict: """Step 2: Generate all content for the given structure in one call.""" # Create a clean version of the structure for the prompt structure_for_prompt = { "sections": [ { "heading": s.get("heading"), "subsections": s.get("subsections", []) } for s in toc_structure.get("sections", []) ] } structure_json = json.dumps(structure_for_prompt, indent=2) prompt = ARTICLE_PROMPT.format(topic=topic, structure_json=structure_json) resp = call_pollinations(prompt) log_raw(topic, prompt, resp) data = extract_json(resp) return data # ---- Renderer ---- def esc(s): return html_lib.escape(s) if isinstance(s, str) else "" def render_page(article: dict, execution_time: Optional[float] = None) -> str: """Render final HTML page from the fully-populated article JSON.""" title = esc(article.get("title", "Untitled")) lead = esc(article.get("lead", "")) css = """body{font-family:sans-serif;margin:0;background:#f6f6f7;color:#202122}#container{display:flex;min-height:100vh}#left-sidebar{width:18%;padding:1.2em;background:#f6f6f7;border-right:1px solid #a7d7f9;box-sizing:border-box}#main-content{width:82%;padding:1.6em;background:#fff;box-sizing:border-box}header{display:flex;justify-content:space-between;align-items:center;border-bottom:1px solid #a7d7f9;padding-bottom:.6em;margin-bottom:1em}#main-title{font-family:Georgia,serif;font-size:2em;margin:0 0 .2em 0;font-weight:normal}.site-sub{color:#54595d;margin-top:0;font-size:.95em}h2{font-size:1.3em;margin-top:1.2em;border-bottom:1px solid #a2a9b1;padding-bottom:.2em;font-weight:normal}h3{font-size:1.05em;margin-top:.8em}p{line-height:1.6}#toc{background:#f8f9fa;border:1px solid #a2a9b1;padding:1em;margin-bottom:1em;display:inline-block}footer{margin-top:2em;border-top:1px solid #a2a9b1;padding-top:1em;color:#54595d;font-size:.85em}.references ol{padding-left:1.2em}""" parts = [ "", f"{title} - Wikipedai", "", f"
", "
ArticleTalk
", f"

{title}

From Wikipedai, the free encyclopedai

", ] if lead: parts.append(f"

{lead}

") if article.get("sections"): parts.append("

Contents

") for i, sec in enumerate(article.get("sections", []), 1): parts.append(f"

{esc(sec.get('heading',''))}

") if sec.get("content"): parts.append(f"

{esc(sec.get('content',''))}

") for j, sub in enumerate(sec.get("subsections", []) or [], 1): parts.append(f"

{esc(sub.get('subheading',''))}

") if sub.get("content"): parts.append(f"

{esc(sub.get('content',''))}

") footer_parts = [] if article.get("last_edited"): footer_parts.append(f"This page was last edited on {esc(article.get('last_edited', ''))}") if execution_time is not None: footer_parts.append(f"Page generated in {execution_time:.2f} seconds") footer_content = " • ".join(footer_parts) parts.append(f"
") js = """ """ parts.append(js) return "\n".join(parts) # ---- API Routes ---- @app.get("/wikipedai/{topic}", response_class=HTMLResponse) def wikipedai(topic: str): start_time = time.time() RAW_LOG[topic] = [] try: # Step 1: Get the article structure (title, lead, headings) article_structure = generate_headlines(topic) # Step 2: Get all content for that structure in a single API call article_content = generate_article_content(topic, article_structure) # Step 3: Merge the content back into the original structure # This assumes the AI returned the sections in the same order, which it should. content_sections = article_content.get("sections", []) for i, section_structure in enumerate(article_structure.get("sections", [])): if i < len(content_sections): # Add content to the main section section_structure["content"] = content_sections[i].get("content", "[Content not generated]") # Add content to subsections content_subsections = content_sections[i].get("subsections", []) for j, sub_structure in enumerate(section_structure.get("subsections", [])): if j < len(content_subsections): sub_structure["content"] = content_subsections[j].get("content", "[Content not generated]") # Final render elapsed_time = time.time() - start_time html = render_page(article_structure, execution_time=elapsed_time) return HTMLResponse(content=html, status_code=200) except Exception as e: # Capture the full traceback for better debugging import traceback error_details = f"Error: {e}\n\nTraceback:\n{traceback.format_exc()}" return HTMLResponse(content=f"

Error

{html_lib.escape(error_details)}
", status_code=500) @app.get("/raw/{topic}", response_class=PlainTextResponse) def raw(topic: str): entries = RAW_LOG.get(topic, []) if not entries: return PlainTextResponse(f"No raw log found for topic '{topic}'. Try calling /wikipedai/{topic} first.", status_code=404) out_lines = [] for idx, (prompt, resp) in enumerate(entries, start=1): out_lines.append(f"--- Input [{idx}] ---\n{prompt}\n\n--- AI response [{idx}] ---\n{resp}\n") return PlainTextResponse("\n".join(out_lines), status_code=200)