#!/usr/bin/env python
from __future__ import annotations
import os
import random
import tempfile
import sys
# Check critical dependencies before proceeding
try:
import numpy as np
import torch
import gradio as gr
import imageio
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
except ImportError as e:
print(f"Error: Missing required dependency - {e}")
print("Please ensure requirements.txt includes: numpy, torch, diffusers, gradio, imageio")
sys.exit(1)
DESCRIPTION = '''# [ModelScope Text to Video Synthesis](https://modelscope.cn/models/damo/text-to-video-synthesis/summary)
For Colab usage, you can view this webpage.
This model can only be used for non-commercial purposes. See the model card.
'''
if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
DESCRIPTION += f'''\nFor faster inference, you may duplicate this space and upgrade to GPU.

'''
MAX_NUM_FRAMES = int(os.getenv('MAX_NUM_FRAMES', '64')) # Reduced from 200 for stability
DEFAULT_NUM_FRAMES = min(MAX_NUM_FRAMES, 16)
# Initialize pipeline with error handling
try:
pipe = DiffusionPipeline.from_pretrained(
'damo-vilab/text-to-video-ms-1.7b',
torch_dtype=torch.float16,
variant='fp16'
)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()
except Exception as e:
print(f"Failed to initialize pipeline: {e}")
print("This model requires significant GPU memory. Try a smaller model like 'cerspense/zeroscope_v2_576w' if needed.")
sys.exit(1)
def to_video(frames: list[np.ndarray], fps: int) -> str:
"""Convert frames to video using imageio with FFMPEG."""
try:
out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
writer = imageio.get_writer(out_file.name, format='FFMPEG', fps=fps)
for frame in frames:
writer.append_data(frame)
writer.close()
return out_file.name
except Exception as e:
print(f"Video creation failed: {e}")
raise
def generate(prompt: str, seed: int, num_frames: int, num_inference_steps: int) -> str:
"""Generate video from text prompt."""
if not prompt.strip():
raise gr.Error("Please enter a valid prompt")
seed = random.randint(0, 1000000) if seed == -1 else seed
generator = torch.Generator().manual_seed(seed)
try:
frames = pipe(
prompt,
num_inference_steps=num_inference_steps,
num_frames=num_frames,
generator=generator
).frames
return to_video(frames, 8)
except torch.cuda.OutOfMemoryError:
raise gr.Error("Out of GPU memory - Try reducing frame count or use a smaller model")
except Exception as e:
raise gr.Error(f"Generation failed: {str(e)}")
examples = [
['An astronaut riding a horse.', 0, 16, 25],
['A panda eating bamboo on a rock.', 0, 16, 25],
['Spiderman is surfing.', 0, 16, 25],
]
with gr.Blocks(css='style.css') as demo:
gr.Markdown(DESCRIPTION)
with gr.Group():
with gr.Box():
with gr.Row(elem_id='prompt-container').style(equal_height=True):
prompt = gr.Text(
label='Prompt',
show_label=False,
max_lines=1,
placeholder='Enter your prompt',
elem_id='prompt-text-input'
)
run_button = gr.Button('Generate video')
result = gr.Video(label='Result', show_label=False)
with gr.Accordion('Advanced options', open=False):
seed = gr.Slider(
label='Seed',
minimum=-1,
maximum=1000000,
step=1,
value=-1,
info='-1 = random seed each time'
)
num_frames = gr.Slider(
label='Number of frames',
minimum=16,
maximum=MAX_NUM_FRAMES,
step=1,
value=DEFAULT_NUM_FRAMES,
info='Higher values require more GPU memory'
)
num_inference_steps = gr.Slider(
label='Inference steps',
minimum=10,
maximum=50,
step=1,
value=25
)
inputs = [prompt, seed, num_frames, num_inference_steps]
gr.Examples(
examples=examples,
inputs=inputs,
outputs=result,
fn=generate,
cache_examples=os.getenv('SYSTEM') == 'spaces'
)
prompt.submit(fn=generate, inputs=inputs, outputs=result)
run_button.click(fn=generate, inputs=inputs, outputs=result)
# Additional UI sections remain unchanged...
demo.queue(max_size=10).launch()