|
import re |
|
import json |
|
import requests |
|
import html as html_lib |
|
import time |
|
from typing import Optional |
|
from fastapi import FastAPI |
|
from fastapi.responses import HTMLResponse, PlainTextResponse |
|
import random |
|
|
|
app = FastAPI() |
|
|
|
POLLINATIONS_URL = "https://text.pollinations.ai/prompt/" |
|
|
|
|
|
|
|
|
|
HEADLINES_PROMPT = """ |
|
You are an AI that produces a table of contents, for a neutral, encyclopedic Wikipedia-style article. |
|
Write about the topic: "{topic}". |
|
Output ONLY valid JSON and NOTHING else. Do not add explanatory text, headers, markdown or code fences. |
|
Format exactly: |
|
{{ |
|
"title": "string", |
|
"lead": "string", |
|
"sections": [ |
|
{{ |
|
"heading": "string", |
|
"subsections": [ |
|
{{ |
|
"subheading": "string" |
|
}} |
|
] |
|
}} |
|
], |
|
"last_edited": "string" /* optional */ |
|
}} |
|
""" |
|
|
|
|
|
ARTICLE_PROMPT = """ |
|
You are an AI that writes a complete, neutral, and detailed encyclopedic Wikipedia-style article. |
|
The topic is "{topic}". |
|
You have been given a JSON structure containing headings and subheadings. Your task is to write the content for this structure. |
|
|
|
Instructions: |
|
1. **Content Depth:** Write a detailed paragraph for each heading and subheading. Paragraphs for the main headings should be especially comprehensive, consisting of several sentences to provide a thorough overview of the section's topic. |
|
2. **Structure:** Do not invent new sections. Stick strictly to the provided input structure. |
|
3. **Output Format:** Output ONLY a valid JSON object and NOTHING else. The output JSON must have the exact same structure as the input, but with a "content" field added to each section and subsection. |
|
|
|
Input Structure: |
|
{structure_json} |
|
|
|
Output Format Example: |
|
{{ |
|
"sections": [ |
|
{{ |
|
"heading": "History", |
|
"content": "The history of the topic is long and varied, with early concepts dating back to ancient philosophy. Key developments in the 20th century, particularly the work on [[Turing Machines]], laid the groundwork for the modern field.", |
|
"subsections": [ |
|
{{ |
|
"subheading": "Early developments", |
|
"content": "In the early days, developments were slow and often theoretical..." |
|
}} |
|
] |
|
}} |
|
] |
|
}} |
|
""" |
|
|
|
|
|
RAW_LOG = {} |
|
|
|
|
|
def call_pollinations(prompt: str) -> str: |
|
"""Call Pollinations and return the raw text response (no stripping).""" |
|
uri = POLLINATIONS_URL + requests.utils.requote_uri(prompt) + "?token=ZJyDM8G0LiZnNxFf&model=gemini&json=true&seed="+str(random.randint(0,999999)) |
|
r = requests.get(uri, timeout=60) |
|
r.raise_for_status() |
|
return r.text |
|
|
|
def extract_json(text: str) -> dict: |
|
"""Extract and parse the first JSON object found between first '{' and last '}'.""" |
|
start = text.find("{") |
|
end = text.rfind("}") + 1 |
|
if start == -1 or end == -1 or start >= end: |
|
raise ValueError("No JSON object found in AI response.\n\nRaw (truncated):\n" + text[:2000]) |
|
json_str = text[start:end] |
|
try: |
|
return json.loads(json_str) |
|
except Exception as e: |
|
raise ValueError(f"Failed to parse JSON: {e}\n\nExtracted (truncated):\n{json_str[:2000]}\n\nRaw (truncated):\n{text[:2000]}") |
|
|
|
def log_raw(topic: str, prompt: str, response: str): |
|
"""Append a prompt/response pair to RAW_LOG for the topic.""" |
|
RAW_LOG.setdefault(topic, []).append((prompt, response)) |
|
|
|
|
|
def generate_headlines(topic: str) -> dict: |
|
"""Step 1: Get the article structure (TOC).""" |
|
prompt = HEADLINES_PROMPT.format(topic=topic) |
|
resp = call_pollinations(prompt) |
|
log_raw(topic, prompt, resp) |
|
data = extract_json(resp) |
|
|
|
data.setdefault("title", topic.replace("_", " ")) |
|
data.setdefault("lead", data.get("lead", "")) |
|
data.setdefault("sections", data.get("sections", [])) |
|
return data |
|
|
|
def generate_article_content(topic: str, toc_structure: dict) -> dict: |
|
"""Step 2: Generate all content for the given structure in one call.""" |
|
|
|
structure_for_prompt = { |
|
"sections": [ |
|
{ |
|
"heading": s.get("heading"), |
|
"subsections": s.get("subsections", []) |
|
} for s in toc_structure.get("sections", []) |
|
] |
|
} |
|
structure_json = json.dumps(structure_for_prompt, indent=2) |
|
|
|
prompt = ARTICLE_PROMPT.format(topic=topic, structure_json=structure_json) |
|
resp = call_pollinations(prompt) |
|
log_raw(topic, prompt, resp) |
|
data = extract_json(resp) |
|
return data |
|
|
|
|
|
def esc(s): return html_lib.escape(s) if isinstance(s, str) else "" |
|
|
|
def render_page(article: dict, execution_time: Optional[float] = None) -> str: |
|
"""Render final HTML page from the fully-populated article JSON.""" |
|
title = esc(article.get("title", "Untitled")) |
|
lead = esc(article.get("lead", "")) |
|
|
|
css = """body{font-family:sans-serif;margin:0;background:#f6f6f7;color:#202122}#container{display:flex;min-height:100vh}#left-sidebar{width:18%;padding:1.2em;background:#f6f6f7;border-right:1px solid #a7d7f9;box-sizing:border-box}#main-content{width:82%;padding:1.6em;background:#fff;box-sizing:border-box}header{display:flex;justify-content:space-between;align-items:center;border-bottom:1px solid #a7d7f9;padding-bottom:.6em;margin-bottom:1em}#main-title{font-family:Georgia,serif;font-size:2em;margin:0 0 .2em 0;font-weight:normal}.site-sub{color:#54595d;margin-top:0;font-size:.95em}h2{font-size:1.3em;margin-top:1.2em;border-bottom:1px solid #a2a9b1;padding-bottom:.2em;font-weight:normal}h3{font-size:1.05em;margin-top:.8em}p{line-height:1.6}#toc{background:#f8f9fa;border:1px solid #a2a9b1;padding:1em;margin-bottom:1em;display:inline-block}footer{margin-top:2em;border-top:1px solid #a2a9b1;padding-top:1em;color:#54595d;font-size:.85em}.references ol{padding-left:1.2em}""" |
|
|
|
parts = [ |
|
"<!doctype html><html lang='en'><head><meta charset='utf-8'>", |
|
f"<title>{title} - Wikipedai</title>", |
|
"<link rel='icon' href='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai.png'>", |
|
f"<style>{css}</style></head><body><div id='container'><div id='left-sidebar'>", |
|
"<div style='text-align:center;margin-bottom:1em;'><a href='/'><img src='https://huggingface.co/spaces/NihalGazi/Wikipedai/resolve/main/wikipedai_logo.png' alt='logo' style='width:90px'></a></div>", |
|
"<div style='margin-bottom:1em;'><strong>Main menu</strong><ul style='padding-left:1em;'><li><a href='#'>Main page</a></li><li><a href='#'>Contents</a></li><li><a href='#'>Random article</a></li></ul></div></div>", |
|
"<div id='main-content'><header><div><a href='#'>Article</a> • <a href='#'>Talk</a></div><div><input placeholder='Search' id='search_bar' style='padding:.4em;border:1px solid #a2a9b1'></div></header>", |
|
f"<main><h1 id='main-title'>{title}</h1><p class='site-sub'>From Wikipedai, the free encyclopedai</p>", |
|
] |
|
|
|
if lead: parts.append(f"<p><strong>{lead}</strong></p>") |
|
|
|
if article.get("sections"): |
|
parts.append("<div id='toc'><h2>Contents</h2><ul>") |
|
for i, sec in enumerate(article.get("sections", []), 1): |
|
parts.append(f"<li><a href='#sec{i}'>{i}. {esc(sec.get('heading',''))}</a></li>") |
|
if sec.get("subsections"): |
|
parts.append("<ul>") |
|
for j, sub in enumerate(sec.get("subsections", []), 1): |
|
parts.append(f"<li><a href='#sec{i}_sub{j}'>{i}.{j} {esc(sub.get('subheading',''))}</a></li>") |
|
parts.append("</ul>") |
|
parts.append("</ul></div>") |
|
|
|
|
|
|
|
for i, sec in enumerate(article.get("sections", []), 1): |
|
parts.append(f"<h2 id='sec{i}'><span class='mw-headline'>{esc(sec.get('heading',''))}</span></h2>") |
|
if sec.get("content"): parts.append(f"<p>{esc(sec.get('content',''))}</p>") |
|
for j, sub in enumerate(sec.get("subsections", []) or [], 1): |
|
parts.append(f"<h3 id='sec{i}_sub{j}'><span class='mw-headline'>{esc(sub.get('subheading',''))}</span></h3>") |
|
if sub.get("content"): parts.append(f"<p>{esc(sub.get('content',''))}</p>") |
|
|
|
footer_parts = [] |
|
if article.get("last_edited"): footer_parts.append(f"This page was last edited on {esc(article.get('last_edited', ''))}") |
|
if execution_time is not None: footer_parts.append(f"Page generated in {execution_time:.2f} seconds") |
|
footer_content = " • ".join(footer_parts) |
|
|
|
parts.append(f"</main><footer>{footer_content}</footer></div></div></body></html>") |
|
|
|
js = """ |
|
<script> |
|
document.getElementById('search_bar').addEventListener('keydown', function(event) { |
|
// Check if the key pressed was 'Enter' |
|
if (event.key === 'Enter') { |
|
// Prevent any default action |
|
event.preventDefault(); |
|
|
|
// Get the user's query from the input field |
|
const query = document.getElementById('search_bar').value; |
|
|
|
// If the query is empty, do nothing |
|
if (!query) { |
|
return; |
|
} |
|
|
|
// URI-encode the query to handle special characters safely |
|
const encodedQuery = encodeURIComponent(query); |
|
|
|
// Construct the final URL for the API |
|
const apiUrl = `https://nihalgazi-wikipedai.hf.space/wikipedai/${encodedQuery}`; |
|
|
|
// Redirect the browser to the API URL |
|
window.location.href = apiUrl; |
|
} |
|
}); |
|
</script> |
|
""" |
|
|
|
parts.append(js) |
|
return "\n".join(parts) |
|
|
|
|
|
|
|
@app.get("/wikipedai/{topic}", response_class=HTMLResponse) |
|
def wikipedai(topic: str): |
|
start_time = time.time() |
|
RAW_LOG[topic] = [] |
|
|
|
try: |
|
|
|
article_structure = generate_headlines(topic) |
|
|
|
|
|
article_content = generate_article_content(topic, article_structure) |
|
|
|
|
|
|
|
content_sections = article_content.get("sections", []) |
|
for i, section_structure in enumerate(article_structure.get("sections", [])): |
|
if i < len(content_sections): |
|
|
|
section_structure["content"] = content_sections[i].get("content", "[Content not generated]") |
|
|
|
|
|
content_subsections = content_sections[i].get("subsections", []) |
|
for j, sub_structure in enumerate(section_structure.get("subsections", [])): |
|
if j < len(content_subsections): |
|
sub_structure["content"] = content_subsections[j].get("content", "[Content not generated]") |
|
|
|
|
|
elapsed_time = time.time() - start_time |
|
html = render_page(article_structure, execution_time=elapsed_time) |
|
return HTMLResponse(content=html, status_code=200) |
|
|
|
except Exception as e: |
|
|
|
import traceback |
|
error_details = f"Error: {e}\n\nTraceback:\n{traceback.format_exc()}" |
|
return HTMLResponse(content=f"<h1>Error</h1><pre>{html_lib.escape(error_details)}</pre>", status_code=500) |
|
|
|
@app.get("/raw/{topic}", response_class=PlainTextResponse) |
|
def raw(topic: str): |
|
entries = RAW_LOG.get(topic, []) |
|
if not entries: |
|
return PlainTextResponse(f"No raw log found for topic '{topic}'. Try calling /wikipedai/{topic} first.", status_code=404) |
|
|
|
out_lines = [] |
|
for idx, (prompt, resp) in enumerate(entries, start=1): |
|
out_lines.append(f"--- Input [{idx}] ---\n{prompt}\n\n--- AI response [{idx}] ---\n{resp}\n") |
|
return PlainTextResponse("\n".join(out_lines), status_code=200) |