import gradio as gr import torch import spaces import numpy as np import random import os import yaml from pathlib import Path import imageio import tempfile from PIL import Image from huggingface_hub import hf_hub_download import shutil from diffusers import LTXImageToVideoPipeline # o LTXConditionPipeline según versión # ------------------------- # 📦 Descargar y cargar modelo # ------------------------- MODEL_ID = "Lightricks/LTX-Video" CHECKPOINT_FILE = "ltxv-2b-0.9.6-distilled.safetensors" local_ckpt = hf_hub_download(repo_id=MODEL_ID, filename=CHECKPOINT_FILE, cache_dir="./models") pipe = LTXImageToVideoPipeline.from_pretrained( MODEL_ID, revision="main", safety_checker=None, torch_dtype=torch.bfloat16 ).to("cuda") # ------------------------- # 🔧 Funciones de generación # ------------------------- def txt2vid(prompt, height, width, num_frames, steps, seed=None): seed = seed or random.randint(0, 2**32 - 1) generator = torch.Generator(device="cuda").manual_seed(seed) out = pipe( prompt=prompt, height=height, width=width, num_frames=num_frames, num_inference_steps=steps, generator=generator ) vid = out.videos[0] tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") imageio.mimwrite(tmp.name, vid, fps=25) return tmp.name def img2vid(image, prompt, height, width, num_frames, steps, seed=None): img = Image.fromarray(image) cond_vid = imageio.mimwrite( tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name, [np.array(img)] * 1, fps=1 ) seed = seed or random.randint(0, 2**32 - 1) generator = torch.Generator(device="cuda").manual_seed(seed) out = pipe( prompt=prompt, height=height, width=width, num_frames=num_frames, num_inference_steps=steps, generator=generator, conditioning_media_paths=[cond_vid], conditioning_start_frames=[0] ) vid = out.videos[0] tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") imageio.mimwrite(tmp.name, vid, fps=25) return tmp.name # ------------------------- # 🎨 Interfaz Gradio # ------------------------- css = """body { background-color:#111; color:#eee } .gradio-container { max-width:800px; }""" with gr.Blocks(css=css) as demo: gr.Markdown("# LTX‑Video 2B Distilled (Gratuito)") with gr.Tab("Text → Video"): t_prompt = gr.Textbox(label="Prompt", value="A serene landscape at sunrise") t_h = gr.Slider(128, 720, value=512, step=32, label="Height") t_w = gr.Slider(128, 1280, value=768, step=32, label="Width") t_f = gr.Slider(9, 257, value=65, step=8, label="Num Frames") t_s = gr.Slider(4, 16, value=8, step=1, label="Steps") t_seed = gr.Number(label="Seed (opcional)", value=0) t_btn = gr.Button("Generate") t_out = gr.Video() t_btn.click(fn=txt2vid, inputs=[t_prompt, t_h, t_w, t_f, t_s, t_seed], outputs=t_out) with gr.Tab("Image → Video"): i_img = gr.Image(type="numpy") i_prompt = gr.Textbox(label="Prompt", value="A cute fox in the snow") i_h = gr.Slider(128, 720, value=512, step=32, label="Height") i_w = gr.Slider(128, 1280, value=768, step=32, label="Width") i_f = gr.Slider(9, 257, value=65, step=8, label="Num Frames") i_s = gr.Slider(4, 16, value=8, step=1, label="Steps") i_seed = gr.Number(label="Seed (opcional)", value=0) i_btn = gr.Button("Generate") i_out = gr.Video() i_btn.click(fn=img2vid, inputs=[i_img, i_prompt, i_h, i_w, i_f, i_s, i_seed], outputs=i_out) gr.Markdown("**Modelo:** ltxv‑2b‑0.9.6‑distilled • resolución múltiplo de 32, frames múltiplo de 8+1 :contentReference[oaicite:1]{index=1}") demo.launch()