import gradio as gr import torch import numpy as np import random import os from diffusers import DiffusionPipeline import imageio device = "cuda" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 # Load video model pipe = DiffusionPipeline.from_pretrained("stepfun-ai/stepvideo-t2v", torch_dtype=torch_dtype) pipe = pipe.to(device) MAX_SEED = np.iinfo(np.int32).max def infer(prompt, seed, randomize_seed, num_inference_steps): if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.manual_seed(seed) output = pipe(prompt=prompt, num_inference_steps=num_inference_steps, generator=generator) frames = output.frames[0] # list of PIL.Image video_path = "/tmp/video.mp4" imageio.mimsave(video_path, frames, fps=8) return video_path, seed examples = [ "Astronaut dancing on Mars, cinematic lighting", "A cat flying through the city on a skateboard", "Robot chef cooking in a futuristic kitchen" ] with gr.Blocks() as demo: gr.Markdown("# Text-to-Video with `stepvideo-t2v`") with gr.Row(): prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here") run_btn = gr.Button("Generate Video") with gr.Row(): video_output = gr.Video(label="Generated Video") with gr.Accordion("Advanced Settings", open=False): seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, value=25) gr.Examples(examples=examples, inputs=[prompt]) run_btn.click(fn=infer, inputs=[prompt, seed, randomize_seed, num_inference_steps], outputs=[video_output, seed]) if __name__ == "__main__": demo.launch()