import gradio as gr
import torch
import numpy as np
import random
import os
from diffusers import DiffusionPipeline
import imageio

device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

# Load video model
pipe = DiffusionPipeline.from_pretrained("stepfun-ai/stepvideo-t2v", torch_dtype=torch_dtype)
pipe = pipe.to(device)

MAX_SEED = np.iinfo(np.int32).max

def infer(prompt, seed, randomize_seed, num_inference_steps):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.manual_seed(seed)

    output = pipe(prompt=prompt, num_inference_steps=num_inference_steps, generator=generator)
    frames = output.frames[0]  # list of PIL.Image

    video_path = "/tmp/video.mp4"
    imageio.mimsave(video_path, frames, fps=8)

    return video_path, seed

examples = [
    "Astronaut dancing on Mars, cinematic lighting",
    "A cat flying through the city on a skateboard",
    "Robot chef cooking in a futuristic kitchen"
]

with gr.Blocks() as demo:
    gr.Markdown("# Text-to-Video with `stepvideo-t2v`")

    with gr.Row():
        prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here")
        run_btn = gr.Button("Generate Video")

    with gr.Row():
        video_output = gr.Video(label="Generated Video")

    with gr.Accordion("Advanced Settings", open=False):
        seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
        randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
        num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, value=25)

    gr.Examples(examples=examples, inputs=[prompt])

    run_btn.click(fn=infer, inputs=[prompt, seed, randomize_seed, num_inference_steps], outputs=[video_output, seed])

if __name__ == "__main__":
    demo.launch()