Spaces:
Runtime error
Runtime error
File size: 5,382 Bytes
62f5cce adfea12 fc652c4 68866fa 62f5cce 68866fa fc652c4 68866fa fc652c4 68866fa 62f5cce fc652c4 62f5cce 68866fa a9c0e29 68866fa a9c0e29 62f5cce 68866fa fc652c4 68866fa fc652c4 68866fa 62f5cce e2e7f03 68866fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
#!/usr/bin/env python
from __future__ import annotations
import os
import random
import tempfile
import sys
# Check critical dependencies before proceeding
try:
import numpy as np
import torch
import gradio as gr
import imageio
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
except ImportError as e:
print(f"Error: Missing required dependency - {e}")
print("Please ensure requirements.txt includes: numpy, torch, diffusers, gradio, imageio")
sys.exit(1)
DESCRIPTION = '''# [ModelScope Text to Video Synthesis](https://modelscope.cn/models/damo/text-to-video-synthesis/summary)
<p>For Colab usage, you can view <a href="https://colab.research.google.com/drive/1uW1ZqswkQ9Z9bp5Nbo5z59cAn7I0hE6R?usp=sharing" style="text-decoration: underline;" target="_blank">this webpage</a>.</p>
<p>This model can only be used for non-commercial purposes. See the <a href="https://huggingface.co/damo-vilab/modelscope-damo-text-to-video-synthesis" style="text-decoration: underline;" target="_blank">model card</a>.</p>'''
if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
DESCRIPTION += f'''\n<p>For faster inference, you may duplicate this space and upgrade to GPU.
<a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true">
<img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></p>'''
MAX_NUM_FRAMES = int(os.getenv('MAX_NUM_FRAMES', '64')) # Reduced from 200 for stability
DEFAULT_NUM_FRAMES = min(MAX_NUM_FRAMES, 16)
# Initialize pipeline with error handling
try:
pipe = DiffusionPipeline.from_pretrained(
'damo-vilab/text-to-video-ms-1.7b',
torch_dtype=torch.float16,
variant='fp16'
)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()
except Exception as e:
print(f"Failed to initialize pipeline: {e}")
print("This model requires significant GPU memory. Try a smaller model like 'cerspense/zeroscope_v2_576w' if needed.")
sys.exit(1)
def to_video(frames: list[np.ndarray], fps: int) -> str:
"""Convert frames to video using imageio with FFMPEG."""
try:
out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
writer = imageio.get_writer(out_file.name, format='FFMPEG', fps=fps)
for frame in frames:
writer.append_data(frame)
writer.close()
return out_file.name
except Exception as e:
print(f"Video creation failed: {e}")
raise
def generate(prompt: str, seed: int, num_frames: int, num_inference_steps: int) -> str:
"""Generate video from text prompt."""
if not prompt.strip():
raise gr.Error("Please enter a valid prompt")
seed = random.randint(0, 1000000) if seed == -1 else seed
generator = torch.Generator().manual_seed(seed)
try:
frames = pipe(
prompt,
num_inference_steps=num_inference_steps,
num_frames=num_frames,
generator=generator
).frames
return to_video(frames, 8)
except torch.cuda.OutOfMemoryError:
raise gr.Error("Out of GPU memory - Try reducing frame count or use a smaller model")
except Exception as e:
raise gr.Error(f"Generation failed: {str(e)}")
examples = [
['An astronaut riding a horse.', 0, 16, 25],
['A panda eating bamboo on a rock.', 0, 16, 25],
['Spiderman is surfing.', 0, 16, 25],
]
with gr.Blocks(css='style.css') as demo:
gr.Markdown(DESCRIPTION)
with gr.Group():
with gr.Box():
with gr.Row(elem_id='prompt-container').style(equal_height=True):
prompt = gr.Text(
label='Prompt',
show_label=False,
max_lines=1,
placeholder='Enter your prompt',
elem_id='prompt-text-input'
)
run_button = gr.Button('Generate video')
result = gr.Video(label='Result', show_label=False)
with gr.Accordion('Advanced options', open=False):
seed = gr.Slider(
label='Seed',
minimum=-1,
maximum=1000000,
step=1,
value=-1,
info='-1 = random seed each time'
)
num_frames = gr.Slider(
label='Number of frames',
minimum=16,
maximum=MAX_NUM_FRAMES,
step=1,
value=DEFAULT_NUM_FRAMES,
info='Higher values require more GPU memory'
)
num_inference_steps = gr.Slider(
label='Inference steps',
minimum=10,
maximum=50,
step=1,
value=25
)
inputs = [prompt, seed, num_frames, num_inference_steps]
gr.Examples(
examples=examples,
inputs=inputs,
outputs=result,
fn=generate,
cache_examples=os.getenv('SYSTEM') == 'spaces'
)
prompt.submit(fn=generate, inputs=inputs, outputs=result)
run_button.click(fn=generate, inputs=inputs, outputs=result)
# Additional UI sections remain unchanged...
demo.queue(max_size=10).launch() |