Spaces:
Running
on
Zero
Running
on
Zero
add lora gallery
Browse files
app.py
CHANGED
@@ -1,38 +1,269 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
-
import
|
|
|
3 |
import torch
|
|
|
|
|
4 |
from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
|
5 |
from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
|
6 |
-
from diffusers.utils import export_to_video, load_video, load_image
|
|
|
|
|
7 |
import random
|
8 |
import numpy as np
|
9 |
import imageio
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
pipe.vae.enable_tiling()
|
17 |
|
18 |
-
MAX_SEED = np.iinfo(np.int32).max
|
|
|
|
|
19 |
MAX_IMAGE_SIZE = 1280
|
20 |
MAX_NUM_FRAMES = 257
|
21 |
-
FPS = 30.0
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
def round_to_nearest_resolution_acceptable_by_vae(height, width):
|
27 |
-
print("before rounding",height, width)
|
28 |
height = height - (height % pipe.vae_spatial_compression_ratio)
|
29 |
width = width - (width % pipe.vae_spatial_compression_ratio)
|
30 |
-
print("after rounding",height, width)
|
31 |
return height, width
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
def handle_image_upload_for_dims(image_filepath, current_h, current_w):
|
34 |
-
if not image_filepath:
|
35 |
-
# Keep current slider values if image is cleared or no input
|
36 |
return gr.update(value=current_h), gr.update(value=current_w)
|
37 |
try:
|
38 |
img = Image.open(image_filepath)
|
@@ -41,15 +272,13 @@ def handle_image_upload_for_dims(image_filepath, current_h, current_w):
|
|
41 |
return gr.update(value=new_h), gr.update(value=new_w)
|
42 |
except Exception as e:
|
43 |
print(f"Error processing image for dimension update: {e}")
|
44 |
-
# Keep current slider values on error
|
45 |
return gr.update(value=current_h), gr.update(value=current_w)
|
46 |
|
47 |
def handle_video_upload_for_dims(video_filepath, current_h, current_w):
|
48 |
-
if not video_filepath:
|
49 |
return gr.update(value=current_h), gr.update(value=current_w)
|
50 |
try:
|
51 |
-
|
52 |
-
video_filepath_str = str(video_filepath)
|
53 |
if not os.path.exists(video_filepath_str):
|
54 |
print(f"Video file path does not exist for dimension update: {video_filepath_str}")
|
55 |
return gr.update(value=current_h), gr.update(value=current_w)
|
@@ -60,56 +289,38 @@ def handle_video_upload_for_dims(video_filepath, current_h, current_w):
|
|
60 |
if 'size' in meta:
|
61 |
orig_w, orig_h = meta['size']
|
62 |
else:
|
63 |
-
# Fallback: read first frame if 'size' not in metadata
|
64 |
try:
|
65 |
first_frame = reader.get_data(0)
|
66 |
-
# Shape is (h, w, c) for frames
|
67 |
orig_h, orig_w = first_frame.shape[0], first_frame.shape[1]
|
68 |
except Exception as e_frame:
|
69 |
print(f"Could not get video size from metadata or first frame: {e_frame}")
|
70 |
return gr.update(value=current_h), gr.update(value=current_w)
|
71 |
|
72 |
-
if orig_w == -1 or orig_h == -1:
|
73 |
print(f"Could not determine dimensions for video: {video_filepath_str}")
|
74 |
return gr.update(value=current_h), gr.update(value=current_w)
|
75 |
|
76 |
new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
|
77 |
return gr.update(value=new_h), gr.update(value=new_w)
|
78 |
except Exception as e:
|
79 |
-
# Log type of video_filepath for debugging if it's not a path-like string
|
80 |
print(f"Error processing video for dimension update: {e} (Path: {video_filepath}, Type: {type(video_filepath)})")
|
81 |
return gr.update(value=current_h), gr.update(value=current_w)
|
82 |
|
83 |
-
def update_task_image():
|
84 |
-
|
|
|
85 |
|
86 |
-
def
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
def get_duration(prompt,
|
93 |
-
negative_prompt,
|
94 |
-
image,
|
95 |
-
video,
|
96 |
-
height,
|
97 |
-
width,
|
98 |
-
mode,
|
99 |
-
steps,
|
100 |
-
num_frames,
|
101 |
-
frames_to_use,
|
102 |
-
seed,
|
103 |
-
randomize_seed,
|
104 |
-
guidance_scale,
|
105 |
-
duration_input,
|
106 |
-
improve_texture, progress):
|
107 |
if duration_input > 7:
|
108 |
return 75
|
109 |
else:
|
110 |
return 60
|
111 |
-
|
112 |
-
@spaces.GPU(duration=get_duration)
|
113 |
def generate(prompt,
|
114 |
negative_prompt,
|
115 |
image,
|
@@ -118,164 +329,202 @@ def generate(prompt,
|
|
118 |
width,
|
119 |
mode,
|
120 |
steps,
|
121 |
-
num_frames
|
122 |
frames_to_use,
|
123 |
seed,
|
124 |
randomize_seed,
|
125 |
guidance_scale,
|
126 |
duration_input,
|
127 |
-
improve_texture=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
if randomize_seed:
|
130 |
seed = random.randint(0, MAX_SEED)
|
131 |
|
132 |
-
# calculate number of frames based on the duration input in seconds
|
133 |
target_frames_ideal = duration_input * FPS
|
134 |
target_frames_rounded = round(target_frames_ideal)
|
135 |
-
if target_frames_rounded < 1:
|
136 |
-
target_frames_rounded = 1
|
137 |
|
138 |
n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
|
139 |
actual_num_frames = int(n_val * 8 + 1)
|
140 |
-
|
141 |
actual_num_frames = max(9, actual_num_frames)
|
142 |
-
num_frames = min(MAX_NUM_FRAMES, actual_num_frames)
|
143 |
|
144 |
if mode == "video-to-video" and (video is not None):
|
145 |
-
|
146 |
-
|
147 |
-
width, height =
|
148 |
-
steps = 4
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
if
|
158 |
-
condition1 = LTXVideoCondition(video=
|
159 |
else:
|
160 |
condition1 = None
|
161 |
|
162 |
-
|
163 |
-
# Text-only conditioning is also supported without the need to pass `conditions`
|
164 |
-
expected_height, expected_width = height, width
|
165 |
downscale_factor = 2 / 3
|
166 |
downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
|
167 |
downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
|
168 |
-
|
169 |
timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725]
|
170 |
timesteps_second_pass = [1000, 909, 725, 421]
|
171 |
-
if steps == 8:
|
172 |
timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725, 0.03]
|
173 |
timesteps_second_pass = [1000, 909, 725, 421, 0]
|
174 |
-
elif 7 < steps < 8:
|
175 |
-
timesteps_first_pass = None
|
176 |
timesteps_second_pass = None
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
prompt=prompt,
|
181 |
-
negative_prompt=negative_prompt,
|
182 |
-
width=downscaled_width,
|
183 |
-
height=downscaled_height,
|
184 |
-
num_frames=num_frames,
|
185 |
-
num_inference_steps=steps,
|
186 |
-
decode_timestep = 0.05,
|
187 |
-
decode_noise_scale = 0.025,
|
188 |
-
timesteps = timesteps_first_pass, #[1.0000, 0.9937, 0.9875, 0.9812, 0.9750, 0.9094, 0.7250]
|
189 |
-
image_cond_noise_scale=0.0, # testing
|
190 |
-
guidance_rescale=0.7,# testing
|
191 |
-
guidance_scale=guidance_scale,
|
192 |
-
generator=torch.Generator(device="cuda").manual_seed(seed),
|
193 |
-
output_type="latent",
|
194 |
-
).frames
|
195 |
-
|
196 |
-
|
197 |
-
# Part 2. Upscale generated video using latent upsampler with fewer inference steps
|
198 |
-
# The available latent upsampler upscales the height/width by 2x
|
199 |
-
if improve_texture:
|
200 |
-
upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
|
201 |
-
upscaled_latents = pipe_upsample(
|
202 |
-
latents=latents,
|
203 |
-
adain_factor=1.0,
|
204 |
-
output_type="latent"
|
205 |
-
).frames
|
206 |
-
|
207 |
-
# Part 3. Denoise the upscaled video with few steps to improve texture (optional, but recommended)
|
208 |
-
video = pipe(
|
209 |
conditions=condition1,
|
210 |
-
prompt=prompt
|
211 |
negative_prompt=negative_prompt,
|
212 |
-
width=
|
213 |
-
height=
|
214 |
num_frames=num_frames,
|
215 |
-
|
216 |
-
denoise_strength=0.999, # Effectively, 3 inference steps out of 4
|
217 |
-
timesteps= timesteps_second_pass,
|
218 |
-
num_inference_steps=10,
|
219 |
-
latents=upscaled_latents,
|
220 |
decode_timestep=0.05,
|
221 |
decode_noise_scale=0.025,
|
|
|
222 |
image_cond_noise_scale=0.0,
|
223 |
guidance_rescale=0.7,
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
-
#
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
for frame_idx in range(len(video_frames)):
|
243 |
-
progress(frame_idx / len(video_frames), desc="Saving video (fallback ffmpeg)")
|
244 |
-
writer.append_data(video_frames[frame_idx])
|
245 |
|
246 |
-
return
|
247 |
-
|
248 |
-
|
249 |
|
|
|
250 |
css="""
|
251 |
-
#col-container {
|
252 |
-
|
253 |
-
|
254 |
-
}
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
const url = new URL(window.location);
|
260 |
-
|
261 |
-
if (url.searchParams.get('__theme') !== 'dark') {
|
262 |
-
url.searchParams.set('__theme', 'dark');
|
263 |
-
window.location.href = url.href;
|
264 |
-
}
|
265 |
-
}
|
266 |
"""
|
267 |
|
268 |
with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
|
269 |
-
|
270 |
-
|
271 |
-
gr.Markdown("Fast high quality video generation. [Model](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltxv-13b-0.9.7-distilled.safetensors) [GitHub](https://github.com/Lightricks/LTX-Video) [Diffusers](#)")
|
272 |
|
273 |
-
|
274 |
-
|
|
|
|
|
275 |
with gr.Tab("image-to-video") as image_tab:
|
276 |
with gr.Group():
|
277 |
video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
|
278 |
-
image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "
|
279 |
i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
280 |
i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
|
281 |
with gr.Tab("text-to-video") as text_tab:
|
@@ -288,108 +537,101 @@ with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
|
|
288 |
with gr.Group():
|
289 |
image_v_hidden = gr.Textbox(label="image_v", visible=False, value=None)
|
290 |
video_v2v = gr.Video(label="Input Video")
|
291 |
-
|
292 |
v2v_prompt = gr.Textbox(label="Prompt", value="Change the style to cinematic anime", lines=3)
|
293 |
v2v_button = gr.Button("Generate Video-to-Video", variant="primary")
|
294 |
|
295 |
-
|
296 |
-
label="Video Duration (seconds)",
|
297 |
-
|
298 |
-
maximum=8.5,
|
299 |
-
value=2,
|
300 |
-
step=0.1,
|
301 |
-
info=f"Target video duration (0.3s to 8.5s)"
|
302 |
)
|
303 |
-
|
304 |
-
|
305 |
-
with gr.Column():
|
306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
307 |
gr.DeepLinkButton()
|
308 |
-
|
309 |
-
|
310 |
-
with gr.Accordion("Advanced settings", open=False):
|
311 |
-
mode = gr.Dropdown(["text-to-video", "image-to-video", "video-to-video"], label="task", value="image-to-video", visible=False)
|
312 |
-
negative_prompt = gr.Textbox(label="negative prompt", value="worst quality, inconsistent motion, blurry, jittery, distorted", lines=2)
|
313 |
-
with gr.Row():
|
314 |
-
seed = gr.Number(label="seed", value=0, precision=0)
|
315 |
-
randomize_seed = gr.Checkbox(label="randomize seed")
|
316 |
-
with gr.Row():
|
317 |
-
guidance_scale= gr.Slider(label="guidance scale", minimum=0, maximum=10, value=1, step=1)
|
318 |
-
steps = gr.Slider(label="Steps", minimum=1, maximum=30, value=7, step=1)
|
319 |
-
num_frames = gr.Slider(label="# frames", minimum=1, maximum=161, value=96, step=1, visible=False)
|
320 |
-
with gr.Row():
|
321 |
-
height = gr.Slider(label="Height", value=512, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info="Must be divisible by 32.")
|
322 |
-
width = gr.Slider(label="Width", value=704, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info="Must be divisible by 32.")
|
323 |
-
|
324 |
-
|
325 |
-
image_i2v.upload(
|
326 |
-
fn=handle_image_upload_for_dims,
|
327 |
-
inputs=[image_i2v, height, width],
|
328 |
-
outputs=[height, width]
|
329 |
-
)
|
330 |
-
video_v2v.upload(
|
331 |
-
fn=handle_video_upload_for_dims,
|
332 |
-
inputs=[video_v2v, height, width],
|
333 |
-
outputs=[height, width]
|
334 |
-
)
|
335 |
|
336 |
-
|
337 |
-
|
338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
)
|
340 |
-
|
341 |
-
|
342 |
-
|
|
|
343 |
)
|
344 |
-
|
345 |
-
|
346 |
-
outputs=[
|
347 |
)
|
348 |
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
num_frames,
|
374 |
-
frames_to_use,
|
375 |
-
seed,
|
376 |
-
randomize_seed,guidance_scale, duration_input, improve_texture],
|
377 |
-
outputs=[output])
|
378 |
-
|
379 |
-
v2v_button.click(fn=generate,
|
380 |
-
inputs=[v2v_prompt,
|
381 |
-
negative_prompt,
|
382 |
-
image_v_hidden,
|
383 |
-
video_v2v,
|
384 |
-
height,
|
385 |
-
width,
|
386 |
-
mode,
|
387 |
-
steps,
|
388 |
-
num_frames,
|
389 |
-
frames_to_use,
|
390 |
-
seed,
|
391 |
-
randomize_seed,guidance_scale, duration_input, improve_texture],
|
392 |
-
outputs=[output])
|
393 |
-
|
394 |
-
|
395 |
-
demo.launch()
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
3 |
+
import json
|
4 |
+
import logging # Not strictly used from app (2) but good practice
|
5 |
import torch
|
6 |
+
from PIL import Image
|
7 |
+
import spaces
|
8 |
from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
|
9 |
from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
|
10 |
+
from diffusers.utils import export_to_video, load_video, load_image # load_image was also in app (2)
|
11 |
+
from huggingface_hub import hf_hub_download, HfFileSystem, ModelCard, snapshot_download
|
12 |
+
import copy # Not strictly used from app (2) but kept if needed later
|
13 |
import random
|
14 |
import numpy as np
|
15 |
import imageio
|
16 |
+
import time
|
17 |
+
import re
|
18 |
+
|
19 |
+
--- LoRA related: Load LoRAs from JSON file ---
|
20 |
+
try:
|
21 |
+
with open('loras.json', 'r') as f:
|
22 |
+
loras = json.load(f)
|
23 |
+
except FileNotFoundError:
|
24 |
+
print("WARNING: loras.json not found. LoRA gallery will be empty or non-functional.")
|
25 |
+
print("Please create loras.json with entries like: [{'title': 'My LTX LoRA', 'repo': 'user/repo', 'weights': 'lora.safetensors', 'trigger_word': 'my style', 'image': 'url_to_image.jpg'}]")
|
26 |
+
loras = []
|
27 |
+
except json.JSONDecodeError:
|
28 |
+
print("WARNING: loras.json is not valid JSON. LoRA gallery will be empty or non-functional.")
|
29 |
+
loras = []
|
30 |
+
|
31 |
+
|
32 |
+
# Initialize the base model
|
33 |
+
dtype = torch.bfloat16 # Assuming LTX uses bfloat16 as per original app (1)
|
34 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
35 |
+
|
36 |
+
# --- Original app (1) pipeline setup ---
|
37 |
+
pipe = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-distilled", torch_dtype=dtype)
|
38 |
+
pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-Latent-Spatial-Upsampler-diffusers", vae=pipe.vae, torch_dtype=dtype)
|
39 |
+
pipe.to(device)
|
40 |
+
pipe_upsample.to(device)
|
41 |
pipe.vae.enable_tiling()
|
42 |
|
43 |
+
MAX_SEED = np.iinfo(np.int32).max # from app (1)
|
44 |
+
# MAX_SEED_APP2 = 2**32-1 # from app (2), slightly different, stick to app (1)'s for consistency here.
|
45 |
+
|
46 |
MAX_IMAGE_SIZE = 1280
|
47 |
MAX_NUM_FRAMES = 257
|
48 |
+
FPS = 30.0
|
49 |
+
MIN_DIM_SLIDER = 256
|
50 |
+
TARGET_FIXED_SIDE = 768
|
51 |
+
|
52 |
+
|
53 |
+
class calculateDuration:
|
54 |
+
def __init__(self, activity_name=""):
|
55 |
+
self.activity_name = activity_name
|
56 |
+
|
57 |
+
def __enter__(self):
|
58 |
+
self.start_time = time.time()
|
59 |
+
return self
|
60 |
+
|
61 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
62 |
+
self.end_time = time.time()
|
63 |
+
self.elapsed_time = self.end_time - self.start_time
|
64 |
+
if self.activity_name:
|
65 |
+
print(f"Elapsed time for {self.activity_name}: {self.elapsed_time:.6f} seconds")
|
66 |
+
else:
|
67 |
+
print(f"Elapsed time: {self.elapsed_time:.6f} seconds")
|
68 |
+
|
69 |
+
|
70 |
+
def update_lora_selection(evt: gr.SelectData):
|
71 |
+
if not loras or evt.index is None or evt.index >= len(loras):
|
72 |
+
return gr.update(), None # No update to markdown, no selected index
|
73 |
+
selected_lora_item = loras[evt.index]
|
74 |
+
# new_placeholder = f"Type a prompt for {selected_lora_item['title']}" # Not updating placeholders directly
|
75 |
+
lora_repo = selected_lora_item["repo"]
|
76 |
+
updated_text = f"### Selected LoRA: [{selected_lora_item['title']}](https://huggingface.co/{lora_repo}) ✨"
|
77 |
+
if selected_lora_item.get('trigger_word'):
|
78 |
+
updated_text += f"\nTrigger word: `{selected_lora_item['trigger_word']}`"
|
79 |
+
# No width/height adjustment to avoid conflict with app (1)'s logic
|
80 |
+
return (
|
81 |
+
# gr.update(placeholder=new_placeholder), # Not changing prompt placeholder
|
82 |
+
updated_text,
|
83 |
+
evt.index,
|
84 |
+
)
|
85 |
+
|
86 |
+
def get_huggingface_safetensors_for_ltx(link): # Renamed for clarity
|
87 |
+
split_link = link.split("/")
|
88 |
+
if len(split_link) != 2:
|
89 |
+
raise Exception("Invalid Hugging Face repository link format. Should be 'username/repository_name'.")
|
90 |
+
|
91 |
+
print(f"Repository attempted: {link}") # Use the combined link
|
92 |
+
|
93 |
+
model_card = ModelCard.load(link) # link is "username/repository_name"
|
94 |
+
base_model = model_card.data.get("base_model")
|
95 |
+
print(f"Base model from card: {base_model}")
|
96 |
+
|
97 |
+
# Validate model type for LTX
|
98 |
+
acceptable_models = {"Lightricks/LTX-Video-0.9.7-distilled"} # Key line for LTX compatibility
|
99 |
+
|
100 |
+
models_to_check = base_model if isinstance(base_model, list) else [base_model]
|
101 |
+
|
102 |
+
if not any(str(model).strip() in acceptable_models for model in models_to_check): # Ensure string comparison
|
103 |
+
raise Exception(f"Not a LoRA for a compatible LTX base model! Expected one of {acceptable_models}, found {models_to_check}")
|
104 |
+
|
105 |
+
image_path = None
|
106 |
+
if model_card.data.get("widget") and isinstance(model_card.data["widget"], list) and len(model_card.data["widget"]) > 0:
|
107 |
+
image_path = model_card.data["widget"][0].get("output", {}).get("url", None)
|
108 |
+
|
109 |
+
trigger_word = model_card.data.get("instance_prompt", "")
|
110 |
+
image_url = f"https://huggingface.co/{link}/resolve/main/{image_path}" if image_path else None
|
111 |
+
|
112 |
+
fs = HfFileSystem()
|
113 |
+
try:
|
114 |
+
list_of_files = fs.ls(link, detail=False)
|
115 |
+
safetensors_name = None
|
116 |
+
# Simplified logic: find first .safetensors, or prioritize specific names if needed
|
117 |
+
# For LoRAs, usually there's one main .safetensors file.
|
118 |
+
# The complex step-based selection from app(2) might be overkill unless LTX LoRAs follow that pattern.
|
119 |
+
|
120 |
+
# Prioritize files common for LoRAs
|
121 |
+
common_lora_filenames = ["lora.safetensors", "pytorch_lora_weights.safetensors"]
|
122 |
+
for f_common in common_lora_filenames:
|
123 |
+
if f"{link}/{f_common}" in list_of_files:
|
124 |
+
safetensors_name = f_common
|
125 |
+
break
|
126 |
+
|
127 |
+
if not safetensors_name: # Fallback to first .safetensors
|
128 |
+
for file_path in list_of_files:
|
129 |
+
filename = file_path.split("/")[-1]
|
130 |
+
if filename.endswith(".safetensors"):
|
131 |
+
safetensors_name = filename
|
132 |
+
break
|
133 |
+
|
134 |
+
if not safetensors_name: # If still not found, then raise error
|
135 |
+
raise Exception("No valid *.safetensors file found in the repository.")
|
136 |
+
|
137 |
+
if not image_url: # Fallback image search
|
138 |
+
for file_path in list_of_files:
|
139 |
+
filename = file_path.split("/")[-1]
|
140 |
+
if filename.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
|
141 |
+
image_url = f"https://huggingface.co/{link}/resolve/main/{filename}"
|
142 |
+
break
|
143 |
+
|
144 |
+
except Exception as e:
|
145 |
+
print(f"Error accessing repository or finding safetensors: {e}")
|
146 |
+
raise Exception(f"Could not validate Hugging Face repository '{link}' or find a .safetensors LoRA file.") from e
|
147 |
+
|
148 |
+
# split_link[0] is user, split_link[1] is repo_name
|
149 |
+
return split_link[1], link, safetensors_name, trigger_word, image_url
|
150 |
+
|
151 |
+
|
152 |
+
def check_custom_model_for_ltx(link_input): # Renamed for clarity
|
153 |
+
print(f"Checking a custom model on: {link_input}")
|
154 |
+
if not link_input or not isinstance(link_input, str):
|
155 |
+
raise Exception("Invalid custom LoRA input. Please provide a Hugging Face repository path (e.g., 'username/repo-name') or URL.")
|
156 |
+
|
157 |
+
link_to_check = link_input.strip()
|
158 |
+
if link_to_check.startswith("https://huggingface.co/"):
|
159 |
+
link_to_check = link_to_check.replace("https://huggingface.co/", "").split("?")[0] # Remove base URL and query params
|
160 |
+
elif link_to_check.startswith("www.huggingface.co/"):
|
161 |
+
link_to_check = link_to_check.replace("www.huggingface.co/", "").split("?")[0]
|
162 |
+
|
163 |
+
# Basic check for 'user/repo' format
|
164 |
+
if '/' not in link_to_check or len(link_to_check.split('/')) != 2:
|
165 |
+
raise Exception("Invalid Hugging Face repository path. Use 'username/repo-name' format.")
|
166 |
+
|
167 |
+
return get_huggingface_safetensors_for_ltx(link_to_check)
|
168 |
+
|
169 |
+
def add_custom_lora_for_ltx(custom_lora_path_input): # Renamed for clarity
|
170 |
+
global loras # To modify the global loras list
|
171 |
+
if custom_lora_path_input:
|
172 |
+
try:
|
173 |
+
title, repo_id, weights_filename, trigger_word, image_url = check_custom_model_for_ltx(custom_lora_path_input)
|
174 |
+
print(f"Loaded custom LoRA: {repo_id}")
|
175 |
+
|
176 |
+
# Create HTML card for display
|
177 |
+
card_html = f'''
|
178 |
+
<div class="custom_lora_card">
|
179 |
+
<span>Loaded custom LoRA:</span>
|
180 |
+
<div class="card_internal">
|
181 |
+
<img src="{image_url if image_url else 'https://huggingface.co/front/assets/huggingface_logo-noborder.svg'}" alt="{title}" style="width:80px; height:80px; object-fit:cover;" />
|
182 |
+
<div>
|
183 |
+
<h4>{title}</h4>
|
184 |
+
<small>Repo: {repo_id}<br>Weights: {weights_filename}<br>
|
185 |
+
{"Trigger: <code><b>"+trigger_word+"</code></b>" if trigger_word else "No trigger word found. If one is needed, include it in your prompt."}
|
186 |
+
</small>
|
187 |
+
</div>
|
188 |
+
</div>
|
189 |
+
</div>
|
190 |
+
'''
|
191 |
+
|
192 |
+
# Check if this LoRA (by repo_id) already exists
|
193 |
+
existing_item_index = next((index for (index, item) in enumerate(loras) if item['repo'] == repo_id), None)
|
194 |
+
|
195 |
+
new_item_data = {
|
196 |
+
"image": image_url,
|
197 |
+
"title": title,
|
198 |
+
"repo": repo_id,
|
199 |
+
"weights": weights_filename,
|
200 |
+
"trigger_word": trigger_word,
|
201 |
+
"custom": True # Mark as custom
|
202 |
+
}
|
203 |
+
|
204 |
+
if existing_item_index is not None:
|
205 |
+
loras[existing_item_index] = new_item_data # Update existing
|
206 |
+
else:
|
207 |
+
loras.append(new_item_data)
|
208 |
+
existing_item_index = len(loras) - 1
|
209 |
+
|
210 |
+
# Update gallery choices
|
211 |
+
gallery_choices = [(item.get("image", "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"), item["title"]) for item in loras]
|
212 |
+
|
213 |
+
return (
|
214 |
+
gr.update(visible=True, value=card_html),
|
215 |
+
gr.update(visible=True), # Show remove button
|
216 |
+
gr.update(choices=gallery_choices, value=None), # Update gallery, deselect
|
217 |
+
f"Custom LoRA '{title}' added. Select it from the gallery.", # Selected info text
|
218 |
+
None, # Reset selected_index state
|
219 |
+
"" # Clear custom LoRA input textbox
|
220 |
+
)
|
221 |
+
|
222 |
+
except Exception as e:
|
223 |
+
gr.Warning(f"Invalid Custom LoRA: {e}")
|
224 |
+
return gr.update(visible=True, value=f"<p style='color:red;'>Error adding LoRA: {e}</p>"), gr.update(visible=False), gr.update(), "", None, custom_lora_path_input
|
225 |
+
else: # No input
|
226 |
+
return gr.update(visible=False), gr.update(visible=False), gr.update(), "", None, ""
|
227 |
+
|
228 |
+
def remove_custom_lora_for_ltx(): # Renamed for clarity
|
229 |
+
global loras
|
230 |
+
# Remove the last added custom LoRA if it's marked (simplistic: assumes one custom at a time or last one)
|
231 |
+
# A more robust way would be to track the index of the custom LoRA being displayed.
|
232 |
+
# For now, let's find the *last* custom LoRA and remove it.
|
233 |
+
custom_lora_indices = [i for i, item in enumerate(loras) if item.get("custom")]
|
234 |
+
if custom_lora_indices:
|
235 |
+
loras.pop(custom_lora_indices[-1]) # Remove the last one marked as custom
|
236 |
+
|
237 |
+
gallery_choices = [(item.get("image", "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"), item["title"]) for item in loras]
|
238 |
+
return gr.update(visible=False, value=""), gr.update(visible=False), gr.update(choices=gallery_choices, value=None), "", None, ""
|
239 |
+
|
240 |
|
241 |
def round_to_nearest_resolution_acceptable_by_vae(height, width):
|
|
|
242 |
height = height - (height % pipe.vae_spatial_compression_ratio)
|
243 |
width = width - (width % pipe.vae_spatial_compression_ratio)
|
|
|
244 |
return height, width
|
245 |
|
246 |
+
def calculate_new_dimensions(orig_w, orig_h):
|
247 |
+
"""Calculates new dimensions maintaining aspect ratio with one side fixed to TARGET_FIXED_SIDE."""
|
248 |
+
if orig_w == 0 or orig_h == 0: return MIN_DIM_SLIDER, MIN_DIM_SLIDER # Avoid division by zero
|
249 |
+
|
250 |
+
if orig_w > orig_h: # Landscape or square
|
251 |
+
new_w = TARGET_FIXED_SIDE
|
252 |
+
new_h = int(TARGET_FIXED_SIDE * orig_h / orig_w)
|
253 |
+
else: # Portrait
|
254 |
+
new_h = TARGET_FIXED_SIDE
|
255 |
+
new_w = int(TARGET_FIXED_SIDE * orig_w / orig_h)
|
256 |
+
|
257 |
+
# Ensure dimensions are at least MIN_DIM_SLIDER
|
258 |
+
new_w = max(MIN_DIM_SLIDER, new_w)
|
259 |
+
new_h = max(MIN_DIM_SLIDER, new_h)
|
260 |
+
|
261 |
+
# Ensure divisibility by VAE compression ratio (e.g., 32)
|
262 |
+
new_h, new_w = round_to_nearest_resolution_acceptable_by_vae(new_h, new_w)
|
263 |
+
return new_h, new_w
|
264 |
+
|
265 |
def handle_image_upload_for_dims(image_filepath, current_h, current_w):
|
266 |
+
if not image_filepath:
|
|
|
267 |
return gr.update(value=current_h), gr.update(value=current_w)
|
268 |
try:
|
269 |
img = Image.open(image_filepath)
|
|
|
272 |
return gr.update(value=new_h), gr.update(value=new_w)
|
273 |
except Exception as e:
|
274 |
print(f"Error processing image for dimension update: {e}")
|
|
|
275 |
return gr.update(value=current_h), gr.update(value=current_w)
|
276 |
|
277 |
def handle_video_upload_for_dims(video_filepath, current_h, current_w):
|
278 |
+
if not video_filepath:
|
279 |
return gr.update(value=current_h), gr.update(value=current_w)
|
280 |
try:
|
281 |
+
video_filepath_str = str(video_filepath)
|
|
|
282 |
if not os.path.exists(video_filepath_str):
|
283 |
print(f"Video file path does not exist for dimension update: {video_filepath_str}")
|
284 |
return gr.update(value=current_h), gr.update(value=current_w)
|
|
|
289 |
if 'size' in meta:
|
290 |
orig_w, orig_h = meta['size']
|
291 |
else:
|
|
|
292 |
try:
|
293 |
first_frame = reader.get_data(0)
|
|
|
294 |
orig_h, orig_w = first_frame.shape[0], first_frame.shape[1]
|
295 |
except Exception as e_frame:
|
296 |
print(f"Could not get video size from metadata or first frame: {e_frame}")
|
297 |
return gr.update(value=current_h), gr.update(value=current_w)
|
298 |
|
299 |
+
if orig_w == -1 or orig_h == -1:
|
300 |
print(f"Could not determine dimensions for video: {video_filepath_str}")
|
301 |
return gr.update(value=current_h), gr.update(value=current_w)
|
302 |
|
303 |
new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
|
304 |
return gr.update(value=new_h), gr.update(value=new_w)
|
305 |
except Exception as e:
|
|
|
306 |
print(f"Error processing video for dimension update: {e} (Path: {video_filepath}, Type: {type(video_filepath)})")
|
307 |
return gr.update(value=current_h), gr.update(value=current_w)
|
308 |
|
309 |
+
def update_task_image(): return "image-to-video"
|
310 |
+
def update_task_text(): return "text-to-video"
|
311 |
+
def update_task_video(): return "video-to-video"
|
312 |
|
313 |
+
def get_duration(prompt, negative_prompt, image, video, height, width, mode, steps, num_frames,
|
314 |
+
frames_to_use, seed, randomize_seed, guidance_scale, duration_input, improve_texture,
|
315 |
+
# New LoRA params
|
316 |
+
selected_lora_index, lora_scale_value,
|
317 |
+
progress): # Add selected_lora_index and lora_scale_value if they affect duration
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
if duration_input > 7:
|
319 |
return 75
|
320 |
else:
|
321 |
return 60
|
322 |
+
|
323 |
+
@spaces.GPU(duration=get_duration) # Needs selected_lora_index and lora_scale_value if get_duration uses them
|
324 |
def generate(prompt,
|
325 |
negative_prompt,
|
326 |
image,
|
|
|
329 |
width,
|
330 |
mode,
|
331 |
steps,
|
332 |
+
num_frames_slider_val, # Renamed to avoid conflict with internal num_frames
|
333 |
frames_to_use,
|
334 |
seed,
|
335 |
randomize_seed,
|
336 |
guidance_scale,
|
337 |
duration_input,
|
338 |
+
improve_texture=False,
|
339 |
+
# New LoRA params
|
340 |
+
selected_lora_index=None,
|
341 |
+
lora_scale_value=0.8, # Default LoRA scale
|
342 |
+
progress=gr.Progress(track_tqdm=True)):
|
343 |
+
|
344 |
+
effective_prompt = prompt
|
345 |
|
346 |
+
# --- LoRA Handling ---
|
347 |
+
# Unload any existing LoRAs from main pipes first to prevent conflicts
|
348 |
+
# This should ideally be more granular if LoRAs are very large or loading is slow.
|
349 |
+
with calculateDuration("Unloading previous LoRAs"):
|
350 |
+
try:
|
351 |
+
pipe.unload_lora_weights()
|
352 |
+
print("Previous LoRAs unloaded if any.")
|
353 |
+
except Exception as e:
|
354 |
+
print(f"Note: Could not unload LoRAs (maybe none were loaded): {e}")
|
355 |
+
|
356 |
+
if selected_lora_index is not None and 0 <= selected_lora_index < len(loras):
|
357 |
+
selected_lora_data = loras[selected_lora_index]
|
358 |
+
lora_repo_id = selected_lora_data["repo"]
|
359 |
+
lora_weights_name = selected_lora_data.get("weights", None)
|
360 |
+
lora_trigger = selected_lora_data.get("trigger_word", "")
|
361 |
+
|
362 |
+
print(f"Selected LoRA: {selected_lora_data['title']} from {lora_repo_id}")
|
363 |
+
if lora_trigger:
|
364 |
+
print(f"Applying trigger word: {lora_trigger}")
|
365 |
+
|
366 |
+
if selected_lora_data.get("trigger_position") == "prepend":
|
367 |
+
effective_prompt = f"{lora_trigger} {prompt}"
|
368 |
+
else: # Default to append or if not specified
|
369 |
+
effective_prompt = f"{prompt} {lora_trigger}"
|
370 |
+
|
371 |
+
with calculateDuration(f"Loading LoRA weights for {selected_lora_data['title']}"):
|
372 |
+
try:
|
373 |
+
# Load into main generation pipe
|
374 |
+
pipe.load_lora_weights(
|
375 |
+
lora_repo_id,
|
376 |
+
weight_name=lora_weights_name,
|
377 |
+
adapter_name="active_lora" # Use a consistent adapter name
|
378 |
+
)
|
379 |
+
pipe.set_adapters(["active_lora"], adapter_weights=[lora_scale_value])
|
380 |
+
print(f"LoRA loaded into main pipe with scale {lora_scale_value}")
|
381 |
+
|
382 |
+
except Exception as e:
|
383 |
+
gr.Warning(f"Failed to load LoRA '{selected_lora_data['title']}': {e}. Proceeding without LoRA.")
|
384 |
+
print(f"Error loading LoRA: {e}")
|
385 |
+
# Ensure pipes are clean if loading failed mid-way (though unload_lora_weights should handle this)
|
386 |
+
try:
|
387 |
+
pipe.unload_lora_weights()
|
388 |
+
except: pass # Ignore errors here
|
389 |
+
else:
|
390 |
+
print("No LoRA selected or invalid index.")
|
391 |
+
# --- End LoRA Handling ---
|
392 |
+
|
393 |
if randomize_seed:
|
394 |
seed = random.randint(0, MAX_SEED)
|
395 |
|
|
|
396 |
target_frames_ideal = duration_input * FPS
|
397 |
target_frames_rounded = round(target_frames_ideal)
|
398 |
+
if target_frames_rounded < 1: target_frames_rounded = 1
|
|
|
399 |
|
400 |
n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
|
401 |
actual_num_frames = int(n_val * 8 + 1)
|
|
|
402 |
actual_num_frames = max(9, actual_num_frames)
|
403 |
+
num_frames = min(MAX_NUM_FRAMES, actual_num_frames) # This num_frames is used by the pipe
|
404 |
|
405 |
if mode == "video-to-video" and (video is not None):
|
406 |
+
loaded_video_frames = load_video(video)[:frames_to_use]
|
407 |
+
condition_input_video = True
|
408 |
+
width, height = loaded_video_frames[0].size
|
409 |
+
# steps = 4 # This was hardcoded, let user control steps
|
410 |
+
elif mode == "image-to-video" and (image is not None):
|
411 |
+
loaded_video_frames = [load_image(image)]
|
412 |
+
width, height = loaded_video_frames[0].size
|
413 |
+
condition_input_video = True
|
414 |
+
else: # text-to-video
|
415 |
+
condition_input_video=False
|
416 |
+
loaded_video_frames = None # No video frames for pure t2v
|
417 |
+
|
418 |
+
if condition_input_video and loaded_video_frames:
|
419 |
+
condition1 = LTXVideoCondition(video=loaded_video_frames, frame_index=0)
|
420 |
else:
|
421 |
condition1 = None
|
422 |
|
423 |
+
expected_height, expected_width = height, width
|
|
|
|
|
424 |
downscale_factor = 2 / 3
|
425 |
downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
|
426 |
downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
|
427 |
+
|
428 |
timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725]
|
429 |
timesteps_second_pass = [1000, 909, 725, 421]
|
430 |
+
if steps == 8:
|
431 |
timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725, 0.03]
|
432 |
timesteps_second_pass = [1000, 909, 725, 421, 0]
|
433 |
+
elif 7 < steps < 8: # Non-integer steps could be an issue for these pre-defined timesteps
|
434 |
+
timesteps_first_pass = None
|
435 |
timesteps_second_pass = None
|
436 |
|
437 |
+
with calculateDuration("Main pipe generation"):
|
438 |
+
latents = pipe(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
conditions=condition1,
|
440 |
+
prompt=effective_prompt, # Use prompt with trigger word
|
441 |
negative_prompt=negative_prompt,
|
442 |
+
width=downscaled_width,
|
443 |
+
height=downscaled_height,
|
444 |
num_frames=num_frames,
|
445 |
+
num_inference_steps=steps,
|
|
|
|
|
|
|
|
|
446 |
decode_timestep=0.05,
|
447 |
decode_noise_scale=0.025,
|
448 |
+
timesteps=timesteps_first_pass,
|
449 |
image_cond_noise_scale=0.0,
|
450 |
guidance_rescale=0.7,
|
451 |
+
guidance_scale=guidance_scale,
|
452 |
+
generator=torch.Generator(device=device).manual_seed(seed),
|
453 |
+
output_type="latent",
|
454 |
+
).frames
|
455 |
+
|
456 |
+
final_video_frames_np = None # Initialize
|
457 |
+
if improve_texture:
|
458 |
+
upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2 # These are internal, not user-facing W/H
|
459 |
+
with calculateDuration("Latent upscaling"):
|
460 |
+
upscaled_latents = pipe_upsample(
|
461 |
+
latents=latents,
|
462 |
+
adain_factor=1.0,
|
463 |
+
output_type="latent"
|
464 |
+
).frames
|
465 |
+
|
466 |
+
with calculateDuration("Denoising upscaled video"):
|
467 |
+
final_video_frames_np = pipe( # Using main pipe for denoising
|
468 |
+
conditions=condition1, # Re-pass condition if applicable
|
469 |
+
prompt=effective_prompt,
|
470 |
+
negative_prompt=negative_prompt,
|
471 |
+
width=upscaled_width, # Use upscaled dimensions for this pass
|
472 |
+
height=upscaled_height,
|
473 |
+
num_frames=num_frames,
|
474 |
+
guidance_scale=guidance_scale,
|
475 |
+
denoise_strength=0.999,
|
476 |
+
timesteps=timesteps_second_pass,
|
477 |
+
num_inference_steps=10, # Or make this configurable
|
478 |
+
latents=upscaled_latents,
|
479 |
+
decode_timestep=0.05,
|
480 |
+
decode_noise_scale=0.025,
|
481 |
+
image_cond_noise_scale=0.0,
|
482 |
+
guidance_rescale=0.7,
|
483 |
+
generator=torch.Generator(device=device).manual_seed(seed),
|
484 |
+
output_type="np",
|
485 |
+
).frames[0]
|
486 |
+
else: # No texture improvement, just upscale latents and decode
|
487 |
+
with calculateDuration("Latent upscaling and decoding (no improve_texture)"):
|
488 |
+
final_video_frames_np = pipe_upsample(
|
489 |
+
latents=latents,
|
490 |
+
output_type="np" # Decode directly
|
491 |
+
).frames[0]
|
492 |
|
493 |
+
# Video saving
|
494 |
+
video_uint8_frames = [(frame * 255).astype(np.uint8) for frame in final_video_frames_np]
|
495 |
+
output_filename = "output.mp4"
|
496 |
+
with calculateDuration("Saving video to mp4"):
|
497 |
+
with imageio.get_writer(output_filename, fps=FPS, quality=8, macro_block_size=1) as writer: # Removed bitrate=None
|
498 |
+
for frame_idx, frame_data in enumerate(video_uint8_frames):
|
499 |
+
progress((frame_idx + 1) / len(video_uint8_frames), desc="Encoding video frames...")
|
500 |
+
writer.append_data(frame_data)
|
|
|
|
|
|
|
501 |
|
502 |
+
return output_filename, seed # Return seed for display
|
|
|
|
|
503 |
|
504 |
+
# --- Gradio UI ---
|
505 |
css="""
|
506 |
+
#col-container { margin: 0 auto; max-width: 1000px; } /* Increased max-width for gallery */
|
507 |
+
#gallery .grid-wrap{height: 20vh !important; max-height: 250px !important;} /* From app (2), adjusted height */
|
508 |
+
.custom_lora_card { border: 1px solid #e0e0e0; border-radius: 8px; padding: 10px; margin-top: 10px; background-color: #f9f9f9; }
|
509 |
+
.card_internal { display: flex; align-items: center; }
|
510 |
+
.card_internal img { margin-right: 1em; border-radius: 4px; }
|
511 |
+
.card_internal div h4 { margin-bottom: 0.2em; }
|
512 |
+
.card_internal div small { font-size: 0.9em; color: #555; }
|
513 |
+
#lora_list_link { font-size: 90%; background: var(--block-background-fill); padding: 0.5em 1em; border-radius: 8px; display:inline-block; margin-top:10px;}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
514 |
"""
|
515 |
|
516 |
with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
|
517 |
+
gr.Markdown("# LTX Video 0.9.7 Distilled with LoRA Explorer")
|
518 |
+
gr.Markdown("Fast high quality video generation with custom LoRA support. [Model](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltxv-13b-0.9.7-distilled.safetensors) [GitHub](https://github.com/Lightricks/LTX-Video)")
|
|
|
519 |
|
520 |
+
selected_lora_index_state = gr.State(None)
|
521 |
+
|
522 |
+
with gr.Row():
|
523 |
+
with gr.Column(scale=2): # Main controls
|
524 |
with gr.Tab("image-to-video") as image_tab:
|
525 |
with gr.Group():
|
526 |
video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
|
527 |
+
image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "clipboard"]) # Removed webcam
|
528 |
i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
529 |
i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
|
530 |
with gr.Tab("text-to-video") as text_tab:
|
|
|
537 |
with gr.Group():
|
538 |
image_v_hidden = gr.Textbox(label="image_v", visible=False, value=None)
|
539 |
video_v2v = gr.Video(label="Input Video")
|
540 |
+
frames_to_use_slider = gr.Slider(label="Frames to use from input video", minimum=9, maximum=MAX_NUM_FRAMES, value=9, step=8, info="Number of initial frames for conditioning. Must be N*8+1.")
|
541 |
v2v_prompt = gr.Textbox(label="Prompt", value="Change the style to cinematic anime", lines=3)
|
542 |
v2v_button = gr.Button("Generate Video-to-Video", variant="primary")
|
543 |
|
544 |
+
duration_slider = gr.Slider(
|
545 |
+
label="Video Duration (seconds)", minimum=0.3, maximum=8.5, value=2, step=0.1,
|
546 |
+
info="Target video duration (0.3s to 8.5s). Actual frames depend on model constraints (multiple of 8 + 1)."
|
|
|
|
|
|
|
|
|
547 |
)
|
548 |
+
improve_texture_checkbox = gr.Checkbox(label="Improve Texture (multi-scale)", value=True, info="Uses a two-pass generation for better quality, but is slower.")
|
549 |
+
|
550 |
+
with gr.Column(scale=1): # LoRA Gallery and Output
|
551 |
+
selected_lora_info_markdown = gr.Markdown("No LoRA selected.")
|
552 |
+
lora_gallery_display = gr.Gallery(
|
553 |
+
# Ensure loras is a list of (image_url, title) tuples or similar
|
554 |
+
value=[(item.get("image", "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"), item["title"]) for item in loras] if loras else [],
|
555 |
+
label="LoRA Gallery",
|
556 |
+
allow_preview=True, preview=True,
|
557 |
+
columns=2, height="auto", object_fit="contain", # Adjusted for better display
|
558 |
+
elem_id="gallery"
|
559 |
+
)
|
560 |
+
with gr.Group():
|
561 |
+
custom_lora_input_path = gr.Textbox(label="Add Custom LoRA from Hugging Face", info="Path like 'username/repo-name'", placeholder="e.g., multimodalart/flux-lora-example (but for LTX!)")
|
562 |
+
gr.Markdown("[Find LTX-compatible LoRAs on Hugging Face](https://huggingface.co/models?other=base_model:Lightricks/LTX-Video-0.9.7-distilled&sort=trending)", elem_id="lora_list_link")
|
563 |
+
|
564 |
+
custom_lora_status_html = gr.HTML(visible=False) # For displaying custom LoRA card
|
565 |
+
remove_custom_lora_button = gr.Button("Remove Last Added Custom LoRA", visible=False)
|
566 |
+
|
567 |
+
output_video = gr.Video(label="Generated Video", interactive=False)
|
568 |
+
# output_seed_info = gr.Textbox(label="Seed Used", interactive=False) # Add this to show seed
|
569 |
gr.DeepLinkButton()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
570 |
|
571 |
+
with gr.Accordion("Advanced settings", open=False):
|
572 |
+
mode_dropdown = gr.Dropdown(["text-to-video", "image-to-video", "video-to-video"], label="Task Mode", value="image-to-video", visible=False) # Keep internal
|
573 |
+
negative_prompt_textbox = gr.Textbox(label="Negative Prompt", value="worst quality, inconsistent motion, blurry, jittery, distorted", lines=2)
|
574 |
+
with gr.Row():
|
575 |
+
seed_number_input = gr.Number(label="Seed", value=0, precision=0)
|
576 |
+
randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=True)
|
577 |
+
with gr.Row():
|
578 |
+
guidance_scale_slider = gr.Slider(label="Guidance Scale (CFG)", minimum=0, maximum=10, value=1.0, step=0.1) # LTX uses low CFG
|
579 |
+
steps_slider = gr.Slider(label="Inference Steps (Main Pass)", minimum=1, maximum=30, value=7, step=1) # Default steps for LTX
|
580 |
+
# num_frames_slider = gr.Slider(label="# Frames (Debug - Overridden by Duration)", minimum=9, maximum=MAX_NUM_FRAMES, value=96, step=8, visible=False) # Hidden, as duration controls it
|
581 |
+
with gr.Row():
|
582 |
+
height_slider = gr.Slider(label="Target Height", value=512, step=pipe.vae_spatial_compression_ratio, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info=f"Must be divisible by {pipe.vae_spatial_compression_ratio}.")
|
583 |
+
width_slider = gr.Slider(label="Target Width", value=704, step=pipe.vae_spatial_compression_ratio, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info=f"Must be divisible by {pipe.vae_spatial_compression_ratio}.")
|
584 |
+
with gr.Row():
|
585 |
+
lora_scale_slider = gr.Slider(label="LoRA Scale", minimum=0.0, maximum=2.0, step=0.05, value=0.8, info="Adjusts the influence of the selected LoRA.")
|
586 |
+
|
587 |
+
|
588 |
+
# --- Event Handlers ---
|
589 |
+
image_i2v.upload(fn=handle_image_upload_for_dims, inputs=[image_i2v, height_slider, width_slider], outputs=[height_slider, width_slider])
|
590 |
+
video_v2v.upload(fn=handle_video_upload_for_dims, inputs=[video_v2v, height_slider, width_slider], outputs=[height_slider, width_slider])
|
591 |
+
video_v2v.clear(lambda cur_h, cur_w: (gr.update(value=cur_h), gr.update(value=cur_w)), inputs=[height_slider, width_slider], outputs=[height_slider, width_slider])
|
592 |
+
image_i2v.clear(lambda cur_h, cur_w: (gr.update(value=cur_h), gr.update(value=cur_w)), inputs=[height_slider, width_slider], outputs=[height_slider, width_slider])
|
593 |
+
|
594 |
+
|
595 |
+
image_tab.select(fn=update_task_image, outputs=[mode_dropdown])
|
596 |
+
text_tab.select(fn=update_task_text, outputs=[mode_dropdown])
|
597 |
+
video_tab.select(fn=update_task_video, outputs=[mode_dropdown])
|
598 |
+
|
599 |
+
# LoRA Gallery Callbacks
|
600 |
+
lora_gallery_display.select(
|
601 |
+
update_lora_selection,
|
602 |
+
outputs=[selected_lora_info_markdown, selected_lora_index_state]
|
603 |
)
|
604 |
+
custom_lora_input_path.submit(
|
605 |
+
add_custom_lora_for_ltx,
|
606 |
+
inputs=[custom_lora_input_path],
|
607 |
+
outputs=[custom_lora_status_html, remove_custom_lora_button, lora_gallery_display, selected_lora_info_markdown, selected_lora_index_state, custom_lora_input_path]
|
608 |
)
|
609 |
+
remove_custom_lora_button.click(
|
610 |
+
remove_custom_lora_for_ltx,
|
611 |
+
outputs=[custom_lora_status_html, remove_custom_lora_button, lora_gallery_display, selected_lora_info_markdown, selected_lora_index_state, custom_lora_input_path]
|
612 |
)
|
613 |
|
614 |
+
# Consolidate inputs for generate function
|
615 |
+
gen_inputs = [
|
616 |
+
negative_prompt_textbox,
|
617 |
+
# image, video (passed specifically by each button)
|
618 |
+
height_slider, width_slider, mode_dropdown, steps_slider,
|
619 |
+
gr.Number(value=96, visible=False), # placeholder for num_frames_slider_val, as it's controlled by duration
|
620 |
+
frames_to_use_slider,
|
621 |
+
seed_number_input, randomize_seed_checkbox, guidance_scale_slider, duration_slider, improve_texture_checkbox,
|
622 |
+
selected_lora_index_state, lora_scale_slider
|
623 |
+
]
|
624 |
+
|
625 |
+
t2v_button.click(fn=generate,
|
626 |
+
inputs=[t2v_prompt, image_n_hidden, video_n_hidden] + gen_inputs,
|
627 |
+
outputs=[output_video, seed_number_input]) # Added seed_number_input to outputs
|
628 |
+
|
629 |
+
i2v_button.click(fn=generate,
|
630 |
+
inputs=[i2v_prompt, image_i2v, video_i_hidden] + gen_inputs,
|
631 |
+
outputs=[output_video, seed_number_input])
|
632 |
+
|
633 |
+
v2v_button.click(fn=generate,
|
634 |
+
inputs=[v2v_prompt, image_v_hidden, video_v2v] + gen_inputs,
|
635 |
+
outputs=[output_video, seed_number_input])
|
636 |
+
|
637 |
+
demo.queue(max_size=10).launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|