manim-gpt / demo.py
Alexey Mametyev
Merge pull request #6 from freQuensy23-coder/codex/update-setup-scripts-to-support-manimml
76ae8f7 unverified
# gradio_manim_gemini_app.py – **v3**
"""Gradio demo
============
β€” third revision β€”
β€’ **ΠŸΡ€Π°Π²ΠΈΠ»ΡŒΠ½Π°Ρ структура history** β€” Ρ‚Π΅ΠΏΠ΅Ρ€ΡŒ `Chatbot` ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅Ρ‚ список *ΠΏΠ°Ρ€*
`(user_text, bot_text)`. Π§Π°Π½ΠΊΠΈ Π±ΠΎΡ‚Π° апдСй‑тят Π²Ρ‚ΠΎΡ€ΠΎΠΉ элСмСнт послСднСй ΠΏΠ°Ρ€Ρ‹,
поэтому Β«Π΄ΡƒΠ±Π»ΠΈΒ» ΠΈ Β«Ρ€ΠΎΠ±ΠΎΡ‚β€‘ΡŽΠ·Π΅Ρ€Β» ΠΈΡΡ‡Π΅Π·Π°ΡŽΡ‚.
β€’ **Ошибки Ρ€Π΅Π½Π΄Π΅Ρ€Π°** ΠΏΡƒΠ±Π»ΠΈΠΊΡƒΡŽΡ‚ΡΡ *ΠΊΠ°ΠΊ ΠΏΠΎΠ»ΡŒΠ·ΠΎΠ²Π°Ρ‚Π΅Π»ΡŒΡΠΊΠΎΠ΅ сообщСниС* ΠΈ Π½Π΅ΠΌΠ΅Π΄Π»Π΅Π½Π½ΠΎ
ΠΎΡ‚ΠΏΡ€Π°Π²Π»ΡΡŽΡ‚ΡΡ Π² Gemini; модСль ΠΎΡ‚Π²Π΅Ρ‡Π°Π΅Ρ‚, ΠΌΡ‹ снова пытаСмся ΡΠ³Π΅Π½Π΅Ρ€ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ ΠΊΠΎΠ΄ β€”
ΠΏΠΎΠ»Π½ΠΎΡΡ‚ΡŒΡŽ автоматичСский Ρ†ΠΈΠΊΠ», ΠΊΠ°ΠΊ Π² вашСм CLI‑скриптС.
β€’ Π£ΠΏΡ€Π°Π²Π»Π΅Π½ΠΈΠ΅ состояниСм свСдСно ΠΊ Ρ‡Ρ‘Ρ‚ΠΊΠΈΠΌ этапам: `await_task`, `coding_loop`,
`await_feedback`, `finished`.
β€’ ПослС ΠΊΠ°ΠΆΠ΄ΠΎΠ³ΠΎ Ρ€Π΅Π½Π΄Π΅Ρ€Π° ΠΏΠΎΠ»ΡŒΠ·ΠΎΠ²Π°Ρ‚Π΅Π»ΡŒ ΠΌΠΎΠΆΠ΅Ρ‚ Π΄Π°Ρ‚ΡŒ Π΄ΠΎΠΏΠΎΠ»Π½ΠΈΡ‚Π΅Π»ΡŒΠ½Ρ‹Π΅ указания β€”
Π²ΠΈΠ΄Π΅ΠΎ отправляСтся Π² Gemini ΠΈ ΠΊΠΎΠ΄ гСнСрируСтся Π·Π°Π½ΠΎΠ²ΠΎ с ΡƒΡ‡Ρ‘Ρ‚ΠΎΠΌ Π·Π°ΠΌΠ΅Ρ‡Π°Π½ΠΈΠΉ.
Запуск:
```bash
pip install --upgrade gradio google-genai manim_video_generator manim-ml
export GEMINI_API_KEY="YOUR_KEY"
python gradio_manim_gemini_app.py
```
"""
from __future__ import annotations
import asyncio
import os
import re
import traceback
from pathlib import Path
from typing import List, Tuple
import gradio as gr
from google import genai
from google.genai.chats import Chat, AsyncChat
from google.genai.types import GenerateContentConfig, ThinkingConfig, UploadFileConfig
from manim_video_generator.video_executor import VideoExecutor # type: ignore
from prompts import SYSTEM_PROMPT_SCENARIO_GENERATOR, SYSTEM_PROMPT_CODEGEN
# ──────────────────────────────── Config ─────────────────────────────────────
API_KEY = os.getenv("GEMINI_API_KEY")
if not API_KEY:
raise EnvironmentError("GEMINI_API_KEY env variable not set.")
client = genai.Client(api_key=API_KEY)
MODEL = "gemini-2.5-flash-preview-05-20"
video_executor = VideoExecutor()
# ─────────────────────── Helpers to work with Chatbot ─────────────────────────
def add_user_msg(history: List[Tuple[str, str]], text: str):
"""Append new (user, «») pair."""
history.append((text, ""))
def append_bot_chunk(history: List[Tuple[str, str]], chunk: str):
"""Add chunk to bot part of the last pair."""
user, bot = history[-1]
history[-1] = (user, bot + chunk)
class StreamPart:
def __init__(self, text: str):
self.text = text
class ThinkingStreamPart(StreamPart): pass
class TextStreamPart(StreamPart): pass
async def stream_parts(chat, prompt):
cfg = GenerateContentConfig(thinking_config=ThinkingConfig(include_thoughts=True))
async for chunk in await chat.send_message_stream(prompt, config=cfg):
if chunk.candidates:
cand = chunk.candidates[0]
if cand.content and cand.content.parts:
for part in cand.content.parts:
if part.text:
if part.thought:
yield ThinkingStreamPart(part.text)
else:
yield TextStreamPart(part.text)
def extract_python(md: str) -> str:
m = re.search(r"```python(.*?)```", md, re.S)
if not m:
raise ValueError("No ```python``` block found in model output.")
return m.group(1).strip()
async def coding_cycle(state: "Session", history: List[Tuple[str, str]], prompt):
"""Generate code, render video and return once rendering succeeds."""
while True:
async for chunk in stream_parts(state.chat, prompt):
append_bot_chunk(history, chunk.text)
yield history, state, state.last_video
await asyncio.sleep(0)
full_answer = history[-1][1]
try:
py_code = extract_python(full_answer)
except ValueError as e:
err_msg = f"Error: {e}. Please wrap the code in ```python``` fence."
prompt = err_msg
add_user_msg(history, err_msg)
yield history, state, state.last_video
continue
try:
append_bot_chunk(history, "\n⏳ Rendering... It can take a few minutes")
yield history, state, state.last_video
await asyncio.sleep(0)
video_path = video_executor.execute_manim_code(py_code)
state.last_video = video_path
except Exception as e:
tb = traceback.format_exc(limit=10)
err_msg = (
f"Error, your code is not valid: {e}. Traceback: {tb}. Please fix this error and regenerate the code again."
)
prompt = err_msg
add_user_msg(history, err_msg)
yield history, state, state.last_video
continue
append_bot_chunk(history, "\n🎞️ Rendering done! Feel free to request changes or press **Next Step** to end.")
state.phase = "await_feedback"
yield history, state, state.last_video
return
# ────────────────────────── Session state ────────────────────────────────────
class Session(dict):
phase: str # await_task | coding_loop | await_feedback | finished
chat: AsyncChat | None
last_video: Path | None
def __init__(self):
super().__init__(phase="await_task", chat=None, last_video=None)
self.phase = "await_task"
self.chat = None
self.last_video = None
# ──────────────────────── Main chat handler ──────────────────────────────────
async def chat_handler(user_msg: str, history: List[Tuple[str, str]], state: Session):
history = history or []
# 0. Always reflect user input
add_user_msg(history, user_msg)
yield history, state, state.last_video
# bootstrap chat on very first user request
if state.phase == "await_task":
if not state.chat:
# First time - create chat and generate scenario
state.chat = client.aio.chats.create(model=MODEL)
scenario_prompt = f"{SYSTEM_PROMPT_SCENARIO_GENERATOR}\n\n{user_msg}"
async for txt in stream_parts(state.chat, scenario_prompt):
append_bot_chunk(history, txt.text)
yield history, state, state.last_video
await asyncio.sleep(0)
append_bot_chunk(history, "\n\n*(press **Next Step** to proceed to code generation)*")
yield history, state, state.last_video
return
else:
# Chat exists - check if user wants to proceed or modify scenario
if user_msg.strip().lower() in {"c", "continue", "с"}:
# Legacy behaviour: typed command to proceed
state.phase = "coding_loop"
else:
# User wants to discuss/modify scenario
async for chunk in stream_parts(state.chat, user_msg):
append_bot_chunk(history, chunk.text)
yield history, state, state.last_video
await asyncio.sleep(0)
append_bot_chunk(history, "\n\n*(press **Next Step** when ready to proceed to code generation)*")
yield history, state, state.last_video
return
# later phases require chat obj
if not state.chat:
raise ValueError("Chat not found")
# ── Coding loop ─────────────────────────────────────────────────────────────
if state.phase == "coding_loop":
prompt = "Thanks. It is good scenario. Now generate code for it.\n\n" + SYSTEM_PROMPT_CODEGEN
async for out in coding_cycle(state, history, prompt):
yield out
return
# ── Awaiting user feedback after rendering ────────────────────────────────
if state.phase == "await_feedback":
if user_msg.strip().lower() in {"finish", "done", "f"}:
state.phase = "finished"
append_bot_chunk(history, "Session complete. Refresh page to start over.")
yield history, state, state.last_video
return
file_ref = client.files.upload(file=state.last_video, config=UploadFileConfig(display_name=state.last_video.name))
while file_ref.state and file_ref.state.name == "PROCESSING":
await asyncio.sleep(3)
if file_ref.name:
file_ref = client.files.get(name=file_ref.name)
if file_ref.state and file_ref.state.name == "FAILED":
raise RuntimeError("Gemini failed to process upload")
prompt = [file_ref, f"{user_msg}\n\n{SYSTEM_PROMPT_CODEGEN}"]
state.phase = "coding_loop"
async for out in coding_cycle(state, history, prompt):
yield out
return
# ── Finished phase ──────────────────────────────────────────────────────────
if state.phase == "finished":
append_bot_chunk(history, "Session complete. Refresh page to start over.")
yield history, state, state.last_video
async def next_step_handler(history: List[Tuple[str, str]], state: Session):
"""Advance the conversation without typing control words."""
history = history or []
if state.phase == "await_task" and state.chat:
state.phase = "coding_loop"
prompt = "Thanks. It is good scenario. Now generate code for it.\n\n" + SYSTEM_PROMPT_CODEGEN
async for out in coding_cycle(state, history, prompt):
yield out
return
if state.phase == "await_feedback":
state.phase = "finished"
append_bot_chunk(history, "Session complete. Refresh page to start over.")
yield history, state, state.last_video
return
yield history, state, state.last_video
# ─────────────────────────────── UI ──────────────────────────────────────────
def build_app():
with gr.Blocks(title="Gemini‑Manim Video Creator") as demo:
gr.Markdown("# 🎬 Gemini‑Manim Video Creator\nCreate an explanatory animation from a single prompt.")
history = gr.Chatbot(height=850)
session = gr.State(Session())
with gr.Row():
txt = gr.Textbox(placeholder="Describe the concept…", scale=4)
btn = gr.Button("Send", variant="primary")
next_btn = gr.Button("Next Step")
vid = gr.Video(label="Rendered video", interactive=False)
def get_vid(state: Session):
return state.last_video if state.last_video else None
btn.click(chat_handler, [txt, history, session], [history, session, vid]) \
.then(lambda: "", None, txt)
next_btn.click(next_step_handler, [history, session], [history, session, vid])
return demo
if __name__ == "__main__":
build_app().launch()