Wikipedai / app.py
NihalGazi's picture
Update app.py
5a059e9 verified
import re
import json
import requests
import html as html_lib
import time
from typing import Optional
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, PlainTextResponse
import random
app = FastAPI()
POLLINATIONS_URL = "https://text.pollinations.ai/prompt/"
# ---- Prompt templates ----
# STEP 1: Get the article structure
HEADLINES_PROMPT = """
You are an AI that produces a table of contents, for a neutral, encyclopedic Wikipedia-style article.
Write about the topic: "{topic}".
Output ONLY valid JSON and NOTHING else. Do not add explanatory text, headers, markdown or code fences.
Format exactly:
{{
"title": "string",
"lead": "string",
"sections": [
{{
"heading": "string",
"subsections": [
{{
"subheading": "string"
}}
]
}}
],
"last_edited": "string" /* optional */
}}
"""
# STEP 2: Get all content for the structure in a single call
ARTICLE_PROMPT = """
You are an AI that writes a complete, neutral, and detailed encyclopedic Wikipedia-style article.
The topic is "{topic}".
You have been given a JSON structure containing headings and subheadings. Your task is to write the content for this structure.
Instructions:
1. **Content Depth:** Write a detailed paragraph for each heading and subheading. Paragraphs for the main headings should be especially comprehensive, consisting of several sentences to provide a thorough overview of the section's topic.
2. **Structure:** Do not invent new sections. Stick strictly to the provided input structure.
3. **Output Format:** Output ONLY a valid JSON object and NOTHING else. The output JSON must have the exact same structure as the input, but with a "content" field added to each section and subsection.
Input Structure:
{structure_json}
Output Format Example:
{{
"sections": [
{{
"heading": "History",
"content": "The history of the topic is long and varied, with early concepts dating back to ancient philosophy. Key developments in the 20th century, particularly the work on [[Turing Machines]], laid the groundwork for the modern field.",
"subsections": [
{{
"subheading": "Early developments",
"content": "In the early days, developments were slow and often theoretical..."
}}
]
}}
]
}}
"""
# ---- In-memory raw log (topic -> list of (prompt, response)) ----
RAW_LOG = {}
# ---- Utility functions ----
def call_pollinations(prompt: str) -> str:
"""Call Pollinations and return the raw text response (no stripping)."""
uri = POLLINATIONS_URL + requests.utils.requote_uri(prompt) + "?token=ZJyDM8G0LiZnNxFf&model=gemini&json=true&seed="+str(random.randint(0,999999))
r = requests.get(uri, timeout=60) # Increased timeout for larger generation
r.raise_for_status()
return r.text # preserve raw
def extract_json(text: str) -> dict:
"""Extract and parse the first JSON object found between first '{' and last '}'."""
start = text.find("{")
end = text.rfind("}") + 1
if start == -1 or end == -1 or start >= end:
raise ValueError("No JSON object found in AI response.\n\nRaw (truncated):\n" + text[:2000])
json_str = text[start:end]
try:
return json.loads(json_str)
except Exception as e:
raise ValueError(f"Failed to parse JSON: {e}\n\nExtracted (truncated):\n{json_str[:2000]}\n\nRaw (truncated):\n{text[:2000]}")
def log_raw(topic: str, prompt: str, response: str):
"""Append a prompt/response pair to RAW_LOG for the topic."""
RAW_LOG.setdefault(topic, []).append((prompt, response))
# ---- Two-step generation functions ----
def generate_headlines(topic: str) -> dict:
"""Step 1: Get the article structure (TOC)."""
prompt = HEADLINES_PROMPT.format(topic=topic)
resp = call_pollinations(prompt)
log_raw(topic, prompt, resp)
data = extract_json(resp)
# Normalize structure
data.setdefault("title", topic.replace("_", " "))
data.setdefault("lead", data.get("lead", ""))
data.setdefault("sections", data.get("sections", []))
return data
def generate_article_content(topic: str, toc_structure: dict) -> dict:
"""Step 2: Generate all content for the given structure in one call."""
# Create a clean version of the structure for the prompt
structure_for_prompt = {
"sections": [
{
"heading": s.get("heading"),
"subsections": s.get("subsections", [])
} for s in toc_structure.get("sections", [])
]
}
structure_json = json.dumps(structure_for_prompt, indent=2)
prompt = ARTICLE_PROMPT.format(topic=topic, structure_json=structure_json)
resp = call_pollinations(prompt)
log_raw(topic, prompt, resp)
data = extract_json(resp)
return data
# ---- Renderer ----
def esc(s): return html_lib.escape(s) if isinstance(s, str) else ""
def render_page(article: dict, execution_time: Optional[float] = None) -> str:
"""Render final HTML page from the fully-populated article JSON."""
title = esc(article.get("title", "Untitled"))
lead = esc(article.get("lead", ""))
css = """body{font-family:sans-serif;margin:0;background:#f6f6f7;color:#202122}#container{display:flex;min-height:100vh}#left-sidebar{width:18%;padding:1.2em;background:#f6f6f7;border-right:1px solid #a7d7f9;box-sizing:border-box}#main-content{width:82%;padding:1.6em;background:#fff;box-sizing:border-box}header{display:flex;justify-content:space-between;align-items:center;border-bottom:1px solid #a7d7f9;padding-bottom:.6em;margin-bottom:1em}#main-title{font-family:Georgia,serif;font-size:2em;margin:0 0 .2em 0;font-weight:normal}.site-sub{color:#54595d;margin-top:0;font-size:.95em}h2{font-size:1.3em;margin-top:1.2em;border-bottom:1px solid #a2a9b1;padding-bottom:.2em;font-weight:normal}h3{font-size:1.05em;margin-top:.8em}p{line-height:1.6}#toc{background:#f8f9fa;border:1px solid #a2a9b1;padding:1em;margin-bottom:1em;display:inline-block}footer{margin-top:2em;border-top:1px solid #a2a9b1;padding-top:1em;color:#54595d;font-size:.85em}.references ol{padding-left:1.2em}"""
parts = [
"<!doctype html><html lang='en'><head><meta charset='utf-8'>",
f"<title>{title} - Wikipedai</title>",
"<link rel='icon' href='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai.png'>",
f"<style>{css}</style></head><body><div id='container'><div id='left-sidebar'>",
"<div style='text-align:center;margin-bottom:1em;'><a href='/'><img src='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai_logo.png' alt='logo' style='width:90px'></a></div>",
"<div style='margin-bottom:1em;'><strong>Main menu</strong><ul style='padding-left:1em;'><li><a href='#'>Main page</a></li><li><a href='#'>Contents</a></li><li><a href='#'>Random article</a></li></ul></div></div>",
"<div id='main-content'><header><div><a href='#'>Article</a> • <a href='#'>Talk</a></div><div><input placeholder='Search' id='search_bar' style='padding:.4em;border:1px solid #a2a9b1'></div></header>",
f"<main><h1 id='main-title'>{title}</h1><p class='site-sub'>From Wikipedai, the free encyclopedai</p>",
]
if lead: parts.append(f"<p><strong>{lead}</strong></p>")
if article.get("sections"):
parts.append("<div id='toc'><h2>Contents</h2><ul>")
for i, sec in enumerate(article.get("sections", []), 1):
parts.append(f"<li><a href='#sec{i}'>{i}. {esc(sec.get('heading',''))}</a></li>")
if sec.get("subsections"):
parts.append("<ul>")
for j, sub in enumerate(sec.get("subsections", []), 1):
parts.append(f"<li><a href='#sec{i}_sub{j}'>{i}.{j} {esc(sub.get('subheading',''))}</a></li>")
parts.append("</ul>")
parts.append("</ul></div>")
for i, sec in enumerate(article.get("sections", []), 1):
parts.append(f"<h2 id='sec{i}'><span class='mw-headline'>{esc(sec.get('heading',''))}</span></h2>")
if sec.get("content"): parts.append(f"<p>{esc(sec.get('content',''))}</p>")
for j, sub in enumerate(sec.get("subsections", []) or [], 1):
parts.append(f"<h3 id='sec{i}_sub{j}'><span class='mw-headline'>{esc(sub.get('subheading',''))}</span></h3>")
if sub.get("content"): parts.append(f"<p>{esc(sub.get('content',''))}</p>")
footer_parts = []
if article.get("last_edited"): footer_parts.append(f"This page was last edited on {esc(article.get('last_edited', ''))}")
if execution_time is not None: footer_parts.append(f"Page generated in {execution_time:.2f} seconds")
footer_content = " • ".join(footer_parts)
parts.append(f"</main><footer>{footer_content}</footer></div></div></body></html>")
js = """
<script>
document.getElementById('search_bar').addEventListener('keydown', function(event) {
// Check if the key pressed was 'Enter'
if (event.key === 'Enter') {
// Prevent any default action
event.preventDefault();
// Get the user's query from the input field
const query = document.getElementById('search_bar').value;
// If the query is empty, do nothing
if (!query) {
return;
}
// URI-encode the query to handle special characters safely
const encodedQuery = encodeURIComponent(query);
// Construct the final URL for the API
const apiUrl = `https://nihalgazi-wikipedai.hf.space/wikipedai/${encodedQuery}`;
// Redirect the browser to the API URL
window.location.href = apiUrl;
}
});
</script>
"""
parts.append(js)
return "\n".join(parts)
# ---- API Routes ----
@app.get("/wikipedai/{topic}", response_class=HTMLResponse)
def wikipedai(topic: str):
start_time = time.time()
RAW_LOG[topic] = []
try:
# Step 1: Get the article structure (title, lead, headings)
article_structure = generate_headlines(topic)
# Step 2: Get all content for that structure in a single API call
article_content = generate_article_content(topic, article_structure)
# Step 3: Merge the content back into the original structure
# This assumes the AI returned the sections in the same order, which it should.
content_sections = article_content.get("sections", [])
for i, section_structure in enumerate(article_structure.get("sections", [])):
if i < len(content_sections):
# Add content to the main section
section_structure["content"] = content_sections[i].get("content", "[Content not generated]")
# Add content to subsections
content_subsections = content_sections[i].get("subsections", [])
for j, sub_structure in enumerate(section_structure.get("subsections", [])):
if j < len(content_subsections):
sub_structure["content"] = content_subsections[j].get("content", "[Content not generated]")
# Final render
elapsed_time = time.time() - start_time
html = render_page(article_structure, execution_time=elapsed_time)
return HTMLResponse(content=html, status_code=200)
except Exception as e:
# Capture the full traceback for better debugging
import traceback
error_details = f"Error: {e}\n\nTraceback:\n{traceback.format_exc()}"
return HTMLResponse(content=f"<h1>Error</h1><pre>{html_lib.escape(error_details)}</pre>", status_code=500)
@app.get("/raw/{topic}", response_class=PlainTextResponse)
def raw(topic: str):
entries = RAW_LOG.get(topic, [])
if not entries:
return PlainTextResponse(f"No raw log found for topic '{topic}'. Try calling /wikipedai/{topic} first.", status_code=404)
out_lines = []
for idx, (prompt, resp) in enumerate(entries, start=1):
out_lines.append(f"--- Input [{idx}] ---\n{prompt}\n\n--- AI response [{idx}] ---\n{resp}\n")
return PlainTextResponse("\n".join(out_lines), status_code=200)