Spaces:
Running
Running
Alexey Mametyev
Merge pull request #6 from freQuensy23-coder/codex/update-setup-scripts-to-support-manimml
76ae8f7
unverified
# gradio_manim_gemini_app.py β **v3** | |
"""Gradio demo | |
============ | |
β third revision β | |
β’ **ΠΡΠ°Π²ΠΈΠ»ΡΠ½Π°Ρ ΡΡΡΡΠΊΡΡΡΠ° history** β ΡΠ΅ΠΏΠ΅ΡΡ `Chatbot` ΠΏΠΎΠ»ΡΡΠ°Π΅Ρ ΡΠΏΠΈΡΠΎΠΊ *ΠΏΠ°Ρ* | |
`(user_text, bot_text)`. Π§Π°Π½ΠΊΠΈ Π±ΠΎΡΠ° Π°ΠΏΠ΄Π΅ΠΉβΡΡΡ Π²ΡΠΎΡΠΎΠΉ ΡΠ»Π΅ΠΌΠ΅Π½Ρ ΠΏΠΎΡΠ»Π΅Π΄Π½Π΅ΠΉ ΠΏΠ°ΡΡ, | |
ΠΏΠΎΡΡΠΎΠΌΡ Β«Π΄ΡΠ±Π»ΠΈΒ» ΠΈ Β«ΡΠΎΠ±ΠΎΡβΡΠ·Π΅ΡΒ» ΠΈΡΡΠ΅Π·Π°ΡΡ. | |
β’ **ΠΡΠΈΠ±ΠΊΠΈ ΡΠ΅Π½Π΄Π΅ΡΠ°** ΠΏΡΠ±Π»ΠΈΠΊΡΡΡΡΡ *ΠΊΠ°ΠΊ ΠΏΠΎΠ»ΡΠ·ΠΎΠ²Π°ΡΠ΅Π»ΡΡΠΊΠΎΠ΅ ΡΠΎΠΎΠ±ΡΠ΅Π½ΠΈΠ΅* ΠΈ Π½Π΅ΠΌΠ΅Π΄Π»Π΅Π½Π½ΠΎ | |
ΠΎΡΠΏΡΠ°Π²Π»ΡΡΡΡΡ Π² Gemini; ΠΌΠΎΠ΄Π΅Π»Ρ ΠΎΡΠ²Π΅ΡΠ°Π΅Ρ, ΠΌΡ ΡΠ½ΠΎΠ²Π° ΠΏΡΡΠ°Π΅ΠΌΡΡ ΡΠ³Π΅Π½Π΅ΡΠΈΡΠΎΠ²Π°ΡΡ ΠΊΠΎΠ΄ β | |
ΠΏΠΎΠ»Π½ΠΎΡΡΡΡ Π°Π²ΡΠΎΠΌΠ°ΡΠΈΡΠ΅ΡΠΊΠΈΠΉ ΡΠΈΠΊΠ», ΠΊΠ°ΠΊ Π² Π²Π°ΡΠ΅ΠΌ CLIβΡΠΊΡΠΈΠΏΡΠ΅. | |
β’ Π£ΠΏΡΠ°Π²Π»Π΅Π½ΠΈΠ΅ ΡΠΎΡΡΠΎΡΠ½ΠΈΠ΅ΠΌ ΡΠ²Π΅Π΄Π΅Π½ΠΎ ΠΊ ΡΡΡΠΊΠΈΠΌ ΡΡΠ°ΠΏΠ°ΠΌ: `await_task`, `coding_loop`, | |
`await_feedback`, `finished`. | |
β’ ΠΠΎΡΠ»Π΅ ΠΊΠ°ΠΆΠ΄ΠΎΠ³ΠΎ ΡΠ΅Π½Π΄Π΅ΡΠ° ΠΏΠΎΠ»ΡΠ·ΠΎΠ²Π°ΡΠ΅Π»Ρ ΠΌΠΎΠΆΠ΅Ρ Π΄Π°ΡΡ Π΄ΠΎΠΏΠΎΠ»Π½ΠΈΡΠ΅Π»ΡΠ½ΡΠ΅ ΡΠΊΠ°Π·Π°Π½ΠΈΡ β | |
Π²ΠΈΠ΄Π΅ΠΎ ΠΎΡΠΏΡΠ°Π²Π»ΡΠ΅ΡΡΡ Π² Gemini ΠΈ ΠΊΠΎΠ΄ Π³Π΅Π½Π΅ΡΠΈΡΡΠ΅ΡΡΡ Π·Π°Π½ΠΎΠ²ΠΎ Ρ ΡΡΡΡΠΎΠΌ Π·Π°ΠΌΠ΅ΡΠ°Π½ΠΈΠΉ. | |
ΠΠ°ΠΏΡΡΠΊ: | |
```bash | |
pip install --upgrade gradio google-genai manim_video_generator manim-ml | |
export GEMINI_API_KEY="YOUR_KEY" | |
python gradio_manim_gemini_app.py | |
``` | |
""" | |
from __future__ import annotations | |
import asyncio | |
import os | |
import re | |
import traceback | |
from pathlib import Path | |
from typing import List, Tuple | |
import gradio as gr | |
from google import genai | |
from google.genai.chats import Chat, AsyncChat | |
from google.genai.types import GenerateContentConfig, ThinkingConfig, UploadFileConfig | |
from manim_video_generator.video_executor import VideoExecutor # type: ignore | |
from prompts import SYSTEM_PROMPT_SCENARIO_GENERATOR, SYSTEM_PROMPT_CODEGEN | |
# ββββββββββββββββββββββββββββββββ Config βββββββββββββββββββββββββββββββββββββ | |
API_KEY = os.getenv("GEMINI_API_KEY") | |
if not API_KEY: | |
raise EnvironmentError("GEMINI_API_KEY env variable not set.") | |
client = genai.Client(api_key=API_KEY) | |
MODEL = "gemini-2.5-flash-preview-05-20" | |
video_executor = VideoExecutor() | |
# βββββββββββββββββββββββ Helpers to work with Chatbot βββββββββββββββββββββββββ | |
def add_user_msg(history: List[Tuple[str, str]], text: str): | |
"""Append new (user, «») pair.""" | |
history.append((text, "")) | |
def append_bot_chunk(history: List[Tuple[str, str]], chunk: str): | |
"""Add chunk to bot part of the last pair.""" | |
user, bot = history[-1] | |
history[-1] = (user, bot + chunk) | |
class StreamPart: | |
def __init__(self, text: str): | |
self.text = text | |
class ThinkingStreamPart(StreamPart): pass | |
class TextStreamPart(StreamPart): pass | |
async def stream_parts(chat, prompt): | |
cfg = GenerateContentConfig(thinking_config=ThinkingConfig(include_thoughts=True)) | |
async for chunk in await chat.send_message_stream(prompt, config=cfg): | |
if chunk.candidates: | |
cand = chunk.candidates[0] | |
if cand.content and cand.content.parts: | |
for part in cand.content.parts: | |
if part.text: | |
if part.thought: | |
yield ThinkingStreamPart(part.text) | |
else: | |
yield TextStreamPart(part.text) | |
def extract_python(md: str) -> str: | |
m = re.search(r"```python(.*?)```", md, re.S) | |
if not m: | |
raise ValueError("No ```python``` block found in model output.") | |
return m.group(1).strip() | |
async def coding_cycle(state: "Session", history: List[Tuple[str, str]], prompt): | |
"""Generate code, render video and return once rendering succeeds.""" | |
while True: | |
async for chunk in stream_parts(state.chat, prompt): | |
append_bot_chunk(history, chunk.text) | |
yield history, state, state.last_video | |
await asyncio.sleep(0) | |
full_answer = history[-1][1] | |
try: | |
py_code = extract_python(full_answer) | |
except ValueError as e: | |
err_msg = f"Error: {e}. Please wrap the code in ```python``` fence." | |
prompt = err_msg | |
add_user_msg(history, err_msg) | |
yield history, state, state.last_video | |
continue | |
try: | |
append_bot_chunk(history, "\nβ³ Rendering... It can take a few minutes") | |
yield history, state, state.last_video | |
await asyncio.sleep(0) | |
video_path = video_executor.execute_manim_code(py_code) | |
state.last_video = video_path | |
except Exception as e: | |
tb = traceback.format_exc(limit=10) | |
err_msg = ( | |
f"Error, your code is not valid: {e}. Traceback: {tb}. Please fix this error and regenerate the code again." | |
) | |
prompt = err_msg | |
add_user_msg(history, err_msg) | |
yield history, state, state.last_video | |
continue | |
append_bot_chunk(history, "\nποΈ Rendering done! Feel free to request changes or press **Next Step** to end.") | |
state.phase = "await_feedback" | |
yield history, state, state.last_video | |
return | |
# ββββββββββββββββββββββββββ Session state ββββββββββββββββββββββββββββββββββββ | |
class Session(dict): | |
phase: str # await_task | coding_loop | await_feedback | finished | |
chat: AsyncChat | None | |
last_video: Path | None | |
def __init__(self): | |
super().__init__(phase="await_task", chat=None, last_video=None) | |
self.phase = "await_task" | |
self.chat = None | |
self.last_video = None | |
# ββββββββββββββββββββββββ Main chat handler ββββββββββββββββββββββββββββββββββ | |
async def chat_handler(user_msg: str, history: List[Tuple[str, str]], state: Session): | |
history = history or [] | |
# 0. Always reflect user input | |
add_user_msg(history, user_msg) | |
yield history, state, state.last_video | |
# bootstrap chat on very first user request | |
if state.phase == "await_task": | |
if not state.chat: | |
# First time - create chat and generate scenario | |
state.chat = client.aio.chats.create(model=MODEL) | |
scenario_prompt = f"{SYSTEM_PROMPT_SCENARIO_GENERATOR}\n\n{user_msg}" | |
async for txt in stream_parts(state.chat, scenario_prompt): | |
append_bot_chunk(history, txt.text) | |
yield history, state, state.last_video | |
await asyncio.sleep(0) | |
append_bot_chunk(history, "\n\n*(press **Next Step** to proceed to code generation)*") | |
yield history, state, state.last_video | |
return | |
else: | |
# Chat exists - check if user wants to proceed or modify scenario | |
if user_msg.strip().lower() in {"c", "continue", "Ρ"}: | |
# Legacy behaviour: typed command to proceed | |
state.phase = "coding_loop" | |
else: | |
# User wants to discuss/modify scenario | |
async for chunk in stream_parts(state.chat, user_msg): | |
append_bot_chunk(history, chunk.text) | |
yield history, state, state.last_video | |
await asyncio.sleep(0) | |
append_bot_chunk(history, "\n\n*(press **Next Step** when ready to proceed to code generation)*") | |
yield history, state, state.last_video | |
return | |
# later phases require chat obj | |
if not state.chat: | |
raise ValueError("Chat not found") | |
# ββ Coding loop βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
if state.phase == "coding_loop": | |
prompt = "Thanks. It is good scenario. Now generate code for it.\n\n" + SYSTEM_PROMPT_CODEGEN | |
async for out in coding_cycle(state, history, prompt): | |
yield out | |
return | |
# ββ Awaiting user feedback after rendering ββββββββββββββββββββββββββββββββ | |
if state.phase == "await_feedback": | |
if user_msg.strip().lower() in {"finish", "done", "f"}: | |
state.phase = "finished" | |
append_bot_chunk(history, "Session complete. Refresh page to start over.") | |
yield history, state, state.last_video | |
return | |
file_ref = client.files.upload(file=state.last_video, config=UploadFileConfig(display_name=state.last_video.name)) | |
while file_ref.state and file_ref.state.name == "PROCESSING": | |
await asyncio.sleep(3) | |
if file_ref.name: | |
file_ref = client.files.get(name=file_ref.name) | |
if file_ref.state and file_ref.state.name == "FAILED": | |
raise RuntimeError("Gemini failed to process upload") | |
prompt = [file_ref, f"{user_msg}\n\n{SYSTEM_PROMPT_CODEGEN}"] | |
state.phase = "coding_loop" | |
async for out in coding_cycle(state, history, prompt): | |
yield out | |
return | |
# ββ Finished phase ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
if state.phase == "finished": | |
append_bot_chunk(history, "Session complete. Refresh page to start over.") | |
yield history, state, state.last_video | |
async def next_step_handler(history: List[Tuple[str, str]], state: Session): | |
"""Advance the conversation without typing control words.""" | |
history = history or [] | |
if state.phase == "await_task" and state.chat: | |
state.phase = "coding_loop" | |
prompt = "Thanks. It is good scenario. Now generate code for it.\n\n" + SYSTEM_PROMPT_CODEGEN | |
async for out in coding_cycle(state, history, prompt): | |
yield out | |
return | |
if state.phase == "await_feedback": | |
state.phase = "finished" | |
append_bot_chunk(history, "Session complete. Refresh page to start over.") | |
yield history, state, state.last_video | |
return | |
yield history, state, state.last_video | |
# βββββββββββββββββββββββββββββββ UI ββββββββββββββββββββββββββββββββββββββββββ | |
def build_app(): | |
with gr.Blocks(title="GeminiβManim Video Creator") as demo: | |
gr.Markdown("# π¬ GeminiβManim Video Creator\nCreate an explanatory animation from a single prompt.") | |
history = gr.Chatbot(height=850) | |
session = gr.State(Session()) | |
with gr.Row(): | |
txt = gr.Textbox(placeholder="Describe the conceptβ¦", scale=4) | |
btn = gr.Button("Send", variant="primary") | |
next_btn = gr.Button("Next Step") | |
vid = gr.Video(label="Rendered video", interactive=False) | |
def get_vid(state: Session): | |
return state.last_video if state.last_video else None | |
btn.click(chat_handler, [txt, history, session], [history, session, vid]) \ | |
.then(lambda: "", None, txt) | |
next_btn.click(next_step_handler, [history, session], [history, session, vid]) | |
return demo | |
if __name__ == "__main__": | |
build_app().launch() | |