linoyts HF Staff commited on
Commit
ab50362
·
verified ·
1 Parent(s): c10a255

add lora gallery

Browse files
Files changed (1) hide show
  1. app.py +498 -256
app.py CHANGED
@@ -1,38 +1,269 @@
 
1
  import gradio as gr
2
- import spaces
 
3
  import torch
 
 
4
  from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
5
  from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
6
- from diffusers.utils import export_to_video, load_video, load_image
 
 
7
  import random
8
  import numpy as np
9
  import imageio
10
-
11
-
12
- pipe = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-distilled", torch_dtype=torch.bfloat16)
13
- pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("a-r-r-o-w/LTX-Video-0.9.7-Latent-Spatial-Upsampler-diffusers", vae=pipe.vae, torch_dtype=torch.bfloat16)
14
- pipe.to("cuda")
15
- pipe_upsample.to("cuda")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  pipe.vae.enable_tiling()
17
 
18
- MAX_SEED = np.iinfo(np.int32).max
 
 
19
  MAX_IMAGE_SIZE = 1280
20
  MAX_NUM_FRAMES = 257
21
- FPS = 30.0
22
- # --- Helper function for dimension calculation ---
23
- MIN_DIM_SLIDER = 256 # As defined in the sliders minimum attribute
24
- TARGET_FIXED_SIDE = 768 # Desired fixed side length as per requirement
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def round_to_nearest_resolution_acceptable_by_vae(height, width):
27
- print("before rounding",height, width)
28
  height = height - (height % pipe.vae_spatial_compression_ratio)
29
  width = width - (width % pipe.vae_spatial_compression_ratio)
30
- print("after rounding",height, width)
31
  return height, width
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def handle_image_upload_for_dims(image_filepath, current_h, current_w):
34
- if not image_filepath: # Image cleared or no image initially
35
- # Keep current slider values if image is cleared or no input
36
  return gr.update(value=current_h), gr.update(value=current_w)
37
  try:
38
  img = Image.open(image_filepath)
@@ -41,15 +272,13 @@ def handle_image_upload_for_dims(image_filepath, current_h, current_w):
41
  return gr.update(value=new_h), gr.update(value=new_w)
42
  except Exception as e:
43
  print(f"Error processing image for dimension update: {e}")
44
- # Keep current slider values on error
45
  return gr.update(value=current_h), gr.update(value=current_w)
46
 
47
  def handle_video_upload_for_dims(video_filepath, current_h, current_w):
48
- if not video_filepath: # Video cleared or no video initially
49
  return gr.update(value=current_h), gr.update(value=current_w)
50
  try:
51
- # Ensure video_filepath is a string for os.path.exists and imageio
52
- video_filepath_str = str(video_filepath)
53
  if not os.path.exists(video_filepath_str):
54
  print(f"Video file path does not exist for dimension update: {video_filepath_str}")
55
  return gr.update(value=current_h), gr.update(value=current_w)
@@ -60,56 +289,38 @@ def handle_video_upload_for_dims(video_filepath, current_h, current_w):
60
  if 'size' in meta:
61
  orig_w, orig_h = meta['size']
62
  else:
63
- # Fallback: read first frame if 'size' not in metadata
64
  try:
65
  first_frame = reader.get_data(0)
66
- # Shape is (h, w, c) for frames
67
  orig_h, orig_w = first_frame.shape[0], first_frame.shape[1]
68
  except Exception as e_frame:
69
  print(f"Could not get video size from metadata or first frame: {e_frame}")
70
  return gr.update(value=current_h), gr.update(value=current_w)
71
 
72
- if orig_w == -1 or orig_h == -1: # If dimensions couldn't be determined
73
  print(f"Could not determine dimensions for video: {video_filepath_str}")
74
  return gr.update(value=current_h), gr.update(value=current_w)
75
 
76
  new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
77
  return gr.update(value=new_h), gr.update(value=new_w)
78
  except Exception as e:
79
- # Log type of video_filepath for debugging if it's not a path-like string
80
  print(f"Error processing video for dimension update: {e} (Path: {video_filepath}, Type: {type(video_filepath)})")
81
  return gr.update(value=current_h), gr.update(value=current_w)
82
 
83
- def update_task_image():
84
- return "image-to-video"
 
85
 
86
- def update_task_text():
87
- return "text-to-video"
88
-
89
- def update_task_video():
90
- return "video-to-video"
91
-
92
- def get_duration(prompt,
93
- negative_prompt,
94
- image,
95
- video,
96
- height,
97
- width,
98
- mode,
99
- steps,
100
- num_frames,
101
- frames_to_use,
102
- seed,
103
- randomize_seed,
104
- guidance_scale,
105
- duration_input,
106
- improve_texture, progress):
107
  if duration_input > 7:
108
  return 75
109
  else:
110
  return 60
111
-
112
- @spaces.GPU(duration=get_duration)
113
  def generate(prompt,
114
  negative_prompt,
115
  image,
@@ -118,164 +329,202 @@ def generate(prompt,
118
  width,
119
  mode,
120
  steps,
121
- num_frames,
122
  frames_to_use,
123
  seed,
124
  randomize_seed,
125
  guidance_scale,
126
  duration_input,
127
- improve_texture=False, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  if randomize_seed:
130
  seed = random.randint(0, MAX_SEED)
131
 
132
- # calculate number of frames based on the duration input in seconds
133
  target_frames_ideal = duration_input * FPS
134
  target_frames_rounded = round(target_frames_ideal)
135
- if target_frames_rounded < 1:
136
- target_frames_rounded = 1
137
 
138
  n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
139
  actual_num_frames = int(n_val * 8 + 1)
140
-
141
  actual_num_frames = max(9, actual_num_frames)
142
- num_frames = min(MAX_NUM_FRAMES, actual_num_frames)
143
 
144
  if mode == "video-to-video" and (video is not None):
145
- video = load_video(video)[:frames_to_use]
146
- condition = True
147
- width, height = video[0].size
148
- steps = 4
149
- # elif mode == "image-to-video" and (image is not None):
150
- elif mode == "image-to-video":
151
- video = [load_image(image)]
152
- width, height = video[0].size
153
- condition = True
154
- else:
155
- condition=False
156
-
157
- if condition:
158
- condition1 = LTXVideoCondition(video=video, frame_index=0)
159
  else:
160
  condition1 = None
161
 
162
- # Part 1. Generate video at smaller resolution
163
- # Text-only conditioning is also supported without the need to pass `conditions`
164
- expected_height, expected_width = height, width
165
  downscale_factor = 2 / 3
166
  downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
167
  downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
168
-
169
  timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725]
170
  timesteps_second_pass = [1000, 909, 725, 421]
171
- if steps == 8:
172
  timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725, 0.03]
173
  timesteps_second_pass = [1000, 909, 725, 421, 0]
174
- elif 7 < steps < 8:
175
- timesteps_first_pass = None
176
  timesteps_second_pass = None
177
 
178
- latents = pipe(
179
- conditions=condition1,
180
- prompt=prompt,
181
- negative_prompt=negative_prompt,
182
- width=downscaled_width,
183
- height=downscaled_height,
184
- num_frames=num_frames,
185
- num_inference_steps=steps,
186
- decode_timestep = 0.05,
187
- decode_noise_scale = 0.025,
188
- timesteps = timesteps_first_pass, #[1.0000, 0.9937, 0.9875, 0.9812, 0.9750, 0.9094, 0.7250]
189
- image_cond_noise_scale=0.0, # testing
190
- guidance_rescale=0.7,# testing
191
- guidance_scale=guidance_scale,
192
- generator=torch.Generator(device="cuda").manual_seed(seed),
193
- output_type="latent",
194
- ).frames
195
-
196
-
197
- # Part 2. Upscale generated video using latent upsampler with fewer inference steps
198
- # The available latent upsampler upscales the height/width by 2x
199
- if improve_texture:
200
- upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
201
- upscaled_latents = pipe_upsample(
202
- latents=latents,
203
- adain_factor=1.0,
204
- output_type="latent"
205
- ).frames
206
-
207
- # Part 3. Denoise the upscaled video with few steps to improve texture (optional, but recommended)
208
- video = pipe(
209
  conditions=condition1,
210
- prompt=prompt,
211
  negative_prompt=negative_prompt,
212
- width=upscaled_width,
213
- height=upscaled_height,
214
  num_frames=num_frames,
215
- guidance_scale=guidance_scale,
216
- denoise_strength=0.999, # Effectively, 3 inference steps out of 4
217
- timesteps= timesteps_second_pass,
218
- num_inference_steps=10,
219
- latents=upscaled_latents,
220
  decode_timestep=0.05,
221
  decode_noise_scale=0.025,
 
222
  image_cond_noise_scale=0.0,
223
  guidance_rescale=0.7,
224
- generator=torch.Generator().manual_seed(seed),
225
- output_type="np",
226
- ).frames[0]
227
- else:
228
- upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
229
- video = pipe_upsample(
230
- latents=latents,
231
- output_type="np"
232
- ).frames[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
- # Part 4. Downscale the video to the expected resolution
235
- # video = [frame.resize((expected_width, expected_height)) for frame in video]
236
- # export_to_video(video, "output.mp4", fps=FPS, quality=8)
237
-
238
- video_frames = [(frame * 255).astype(np.uint8) for frame in video]
239
- with imageio.get_writer(
240
- "output.mp4", fps=FPS, quality=8, bitrate=None, macro_block_size=1
241
- ) as writer:
242
- for frame_idx in range(len(video_frames)):
243
- progress(frame_idx / len(video_frames), desc="Saving video (fallback ffmpeg)")
244
- writer.append_data(video_frames[frame_idx])
245
 
246
- return "output.mp4"
247
-
248
-
249
 
 
250
  css="""
251
- #col-container {
252
- margin: 0 auto;
253
- max-width: 900px;
254
- }
255
- """
256
-
257
- js_func = """
258
- function refresh() {
259
- const url = new URL(window.location);
260
-
261
- if (url.searchParams.get('__theme') !== 'dark') {
262
- url.searchParams.set('__theme', 'dark');
263
- window.location.href = url.href;
264
- }
265
- }
266
  """
267
 
268
  with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
269
-
270
- gr.Markdown("# LTX Video 0.9.7 Distilled")
271
- gr.Markdown("Fast high quality video generation. [Model](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltxv-13b-0.9.7-distilled.safetensors) [GitHub](https://github.com/Lightricks/LTX-Video) [Diffusers](#)")
272
 
273
- with gr.Row():
274
- with gr.Column():
 
 
275
  with gr.Tab("image-to-video") as image_tab:
276
  with gr.Group():
277
  video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
278
- image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam", "clipboard"])
279
  i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
280
  i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
281
  with gr.Tab("text-to-video") as text_tab:
@@ -288,108 +537,101 @@ with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
288
  with gr.Group():
289
  image_v_hidden = gr.Textbox(label="image_v", visible=False, value=None)
290
  video_v2v = gr.Video(label="Input Video")
291
- frames_to_use = gr.Slider(label="Frames to use from input video", minimum=9, maximum=MAX_NUM_FRAMES, value=9, step=8, info="Number of initial frames to use for conditioning/transformation. Must be N*8+1.")
292
  v2v_prompt = gr.Textbox(label="Prompt", value="Change the style to cinematic anime", lines=3)
293
  v2v_button = gr.Button("Generate Video-to-Video", variant="primary")
294
 
295
- duration_input = gr.Slider(
296
- label="Video Duration (seconds)",
297
- minimum=0.3,
298
- maximum=8.5,
299
- value=2,
300
- step=0.1,
301
- info=f"Target video duration (0.3s to 8.5s)"
302
  )
303
- improve_texture = gr.Checkbox(label="Improve Texture (multi-scale)", value=True, info="Uses a two-pass generation for better quality, but is slower. Recommended for final output.")
304
-
305
- with gr.Column():
306
- output = gr.Video(label="Generated Video", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  gr.DeepLinkButton()
308
-
309
-
310
- with gr.Accordion("Advanced settings", open=False):
311
- mode = gr.Dropdown(["text-to-video", "image-to-video", "video-to-video"], label="task", value="image-to-video", visible=False)
312
- negative_prompt = gr.Textbox(label="negative prompt", value="worst quality, inconsistent motion, blurry, jittery, distorted", lines=2)
313
- with gr.Row():
314
- seed = gr.Number(label="seed", value=0, precision=0)
315
- randomize_seed = gr.Checkbox(label="randomize seed")
316
- with gr.Row():
317
- guidance_scale= gr.Slider(label="guidance scale", minimum=0, maximum=10, value=1, step=1)
318
- steps = gr.Slider(label="Steps", minimum=1, maximum=30, value=7, step=1)
319
- num_frames = gr.Slider(label="# frames", minimum=1, maximum=161, value=96, step=1, visible=False)
320
- with gr.Row():
321
- height = gr.Slider(label="Height", value=512, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info="Must be divisible by 32.")
322
- width = gr.Slider(label="Width", value=704, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info="Must be divisible by 32.")
323
-
324
-
325
- image_i2v.upload(
326
- fn=handle_image_upload_for_dims,
327
- inputs=[image_i2v, height, width],
328
- outputs=[height, width]
329
- )
330
- video_v2v.upload(
331
- fn=handle_video_upload_for_dims,
332
- inputs=[video_v2v, height, width],
333
- outputs=[height, width]
334
- )
335
 
336
- image_tab.select(
337
- fn=update_task_image,
338
- outputs=[mode]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  )
340
- text_tab.select(
341
- fn=update_task_text,
342
- outputs=[mode]
 
343
  )
344
- video_tab.select(
345
- fn=update_task_video,
346
- outputs=[mode]
347
  )
348
 
349
- t2v_button.click(fn=generate,
350
- inputs=[t2v_prompt,
351
- negative_prompt,
352
- image_n_hidden,
353
- video_n_hidden,
354
- height,
355
- width,
356
- mode,
357
- steps,
358
- num_frames,
359
- frames_to_use,
360
- seed,
361
- randomize_seed,guidance_scale, duration_input, improve_texture],
362
- outputs=[output])
363
-
364
- i2v_button.click(fn=generate,
365
- inputs=[i2v_prompt,
366
- negative_prompt,
367
- image_i2v,
368
- video_i_hidden,
369
- height,
370
- width,
371
- mode,
372
- steps,
373
- num_frames,
374
- frames_to_use,
375
- seed,
376
- randomize_seed,guidance_scale, duration_input, improve_texture],
377
- outputs=[output])
378
-
379
- v2v_button.click(fn=generate,
380
- inputs=[v2v_prompt,
381
- negative_prompt,
382
- image_v_hidden,
383
- video_v2v,
384
- height,
385
- width,
386
- mode,
387
- steps,
388
- num_frames,
389
- frames_to_use,
390
- seed,
391
- randomize_seed,guidance_scale, duration_input, improve_texture],
392
- outputs=[output])
393
-
394
-
395
- demo.launch()
 
1
+ import os
2
  import gradio as gr
3
+ import json
4
+ import logging # Not strictly used from app (2) but good practice
5
  import torch
6
+ from PIL import Image
7
+ import spaces
8
  from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
9
  from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
10
+ from diffusers.utils import export_to_video, load_video, load_image # load_image was also in app (2)
11
+ from huggingface_hub import hf_hub_download, HfFileSystem, ModelCard, snapshot_download
12
+ import copy # Not strictly used from app (2) but kept if needed later
13
  import random
14
  import numpy as np
15
  import imageio
16
+ import time
17
+ import re
18
+
19
+ --- LoRA related: Load LoRAs from JSON file ---
20
+ try:
21
+ with open('loras.json', 'r') as f:
22
+ loras = json.load(f)
23
+ except FileNotFoundError:
24
+ print("WARNING: loras.json not found. LoRA gallery will be empty or non-functional.")
25
+ print("Please create loras.json with entries like: [{'title': 'My LTX LoRA', 'repo': 'user/repo', 'weights': 'lora.safetensors', 'trigger_word': 'my style', 'image': 'url_to_image.jpg'}]")
26
+ loras = []
27
+ except json.JSONDecodeError:
28
+ print("WARNING: loras.json is not valid JSON. LoRA gallery will be empty or non-functional.")
29
+ loras = []
30
+
31
+
32
+ # Initialize the base model
33
+ dtype = torch.bfloat16 # Assuming LTX uses bfloat16 as per original app (1)
34
+ device = "cuda" if torch.cuda.is_available() else "cpu"
35
+
36
+ # --- Original app (1) pipeline setup ---
37
+ pipe = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-distilled", torch_dtype=dtype)
38
+ pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-Latent-Spatial-Upsampler-diffusers", vae=pipe.vae, torch_dtype=dtype)
39
+ pipe.to(device)
40
+ pipe_upsample.to(device)
41
  pipe.vae.enable_tiling()
42
 
43
+ MAX_SEED = np.iinfo(np.int32).max # from app (1)
44
+ # MAX_SEED_APP2 = 2**32-1 # from app (2), slightly different, stick to app (1)'s for consistency here.
45
+
46
  MAX_IMAGE_SIZE = 1280
47
  MAX_NUM_FRAMES = 257
48
+ FPS = 30.0
49
+ MIN_DIM_SLIDER = 256
50
+ TARGET_FIXED_SIDE = 768
51
+
52
+
53
+ class calculateDuration:
54
+ def __init__(self, activity_name=""):
55
+ self.activity_name = activity_name
56
+
57
+ def __enter__(self):
58
+ self.start_time = time.time()
59
+ return self
60
+
61
+ def __exit__(self, exc_type, exc_value, traceback):
62
+ self.end_time = time.time()
63
+ self.elapsed_time = self.end_time - self.start_time
64
+ if self.activity_name:
65
+ print(f"Elapsed time for {self.activity_name}: {self.elapsed_time:.6f} seconds")
66
+ else:
67
+ print(f"Elapsed time: {self.elapsed_time:.6f} seconds")
68
+
69
+
70
+ def update_lora_selection(evt: gr.SelectData):
71
+ if not loras or evt.index is None or evt.index >= len(loras):
72
+ return gr.update(), None # No update to markdown, no selected index
73
+ selected_lora_item = loras[evt.index]
74
+ # new_placeholder = f"Type a prompt for {selected_lora_item['title']}" # Not updating placeholders directly
75
+ lora_repo = selected_lora_item["repo"]
76
+ updated_text = f"### Selected LoRA: [{selected_lora_item['title']}](https://huggingface.co/{lora_repo}) ✨"
77
+ if selected_lora_item.get('trigger_word'):
78
+ updated_text += f"\nTrigger word: `{selected_lora_item['trigger_word']}`"
79
+ # No width/height adjustment to avoid conflict with app (1)'s logic
80
+ return (
81
+ # gr.update(placeholder=new_placeholder), # Not changing prompt placeholder
82
+ updated_text,
83
+ evt.index,
84
+ )
85
+
86
+ def get_huggingface_safetensors_for_ltx(link): # Renamed for clarity
87
+ split_link = link.split("/")
88
+ if len(split_link) != 2:
89
+ raise Exception("Invalid Hugging Face repository link format. Should be 'username/repository_name'.")
90
+
91
+ print(f"Repository attempted: {link}") # Use the combined link
92
+
93
+ model_card = ModelCard.load(link) # link is "username/repository_name"
94
+ base_model = model_card.data.get("base_model")
95
+ print(f"Base model from card: {base_model}")
96
+
97
+ # Validate model type for LTX
98
+ acceptable_models = {"Lightricks/LTX-Video-0.9.7-distilled"} # Key line for LTX compatibility
99
+
100
+ models_to_check = base_model if isinstance(base_model, list) else [base_model]
101
+
102
+ if not any(str(model).strip() in acceptable_models for model in models_to_check): # Ensure string comparison
103
+ raise Exception(f"Not a LoRA for a compatible LTX base model! Expected one of {acceptable_models}, found {models_to_check}")
104
+
105
+ image_path = None
106
+ if model_card.data.get("widget") and isinstance(model_card.data["widget"], list) and len(model_card.data["widget"]) > 0:
107
+ image_path = model_card.data["widget"][0].get("output", {}).get("url", None)
108
+
109
+ trigger_word = model_card.data.get("instance_prompt", "")
110
+ image_url = f"https://huggingface.co/{link}/resolve/main/{image_path}" if image_path else None
111
+
112
+ fs = HfFileSystem()
113
+ try:
114
+ list_of_files = fs.ls(link, detail=False)
115
+ safetensors_name = None
116
+ # Simplified logic: find first .safetensors, or prioritize specific names if needed
117
+ # For LoRAs, usually there's one main .safetensors file.
118
+ # The complex step-based selection from app(2) might be overkill unless LTX LoRAs follow that pattern.
119
+
120
+ # Prioritize files common for LoRAs
121
+ common_lora_filenames = ["lora.safetensors", "pytorch_lora_weights.safetensors"]
122
+ for f_common in common_lora_filenames:
123
+ if f"{link}/{f_common}" in list_of_files:
124
+ safetensors_name = f_common
125
+ break
126
+
127
+ if not safetensors_name: # Fallback to first .safetensors
128
+ for file_path in list_of_files:
129
+ filename = file_path.split("/")[-1]
130
+ if filename.endswith(".safetensors"):
131
+ safetensors_name = filename
132
+ break
133
+
134
+ if not safetensors_name: # If still not found, then raise error
135
+ raise Exception("No valid *.safetensors file found in the repository.")
136
+
137
+ if not image_url: # Fallback image search
138
+ for file_path in list_of_files:
139
+ filename = file_path.split("/")[-1]
140
+ if filename.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
141
+ image_url = f"https://huggingface.co/{link}/resolve/main/{filename}"
142
+ break
143
+
144
+ except Exception as e:
145
+ print(f"Error accessing repository or finding safetensors: {e}")
146
+ raise Exception(f"Could not validate Hugging Face repository '{link}' or find a .safetensors LoRA file.") from e
147
+
148
+ # split_link[0] is user, split_link[1] is repo_name
149
+ return split_link[1], link, safetensors_name, trigger_word, image_url
150
+
151
+
152
+ def check_custom_model_for_ltx(link_input): # Renamed for clarity
153
+ print(f"Checking a custom model on: {link_input}")
154
+ if not link_input or not isinstance(link_input, str):
155
+ raise Exception("Invalid custom LoRA input. Please provide a Hugging Face repository path (e.g., 'username/repo-name') or URL.")
156
+
157
+ link_to_check = link_input.strip()
158
+ if link_to_check.startswith("https://huggingface.co/"):
159
+ link_to_check = link_to_check.replace("https://huggingface.co/", "").split("?")[0] # Remove base URL and query params
160
+ elif link_to_check.startswith("www.huggingface.co/"):
161
+ link_to_check = link_to_check.replace("www.huggingface.co/", "").split("?")[0]
162
+
163
+ # Basic check for 'user/repo' format
164
+ if '/' not in link_to_check or len(link_to_check.split('/')) != 2:
165
+ raise Exception("Invalid Hugging Face repository path. Use 'username/repo-name' format.")
166
+
167
+ return get_huggingface_safetensors_for_ltx(link_to_check)
168
+
169
+ def add_custom_lora_for_ltx(custom_lora_path_input): # Renamed for clarity
170
+ global loras # To modify the global loras list
171
+ if custom_lora_path_input:
172
+ try:
173
+ title, repo_id, weights_filename, trigger_word, image_url = check_custom_model_for_ltx(custom_lora_path_input)
174
+ print(f"Loaded custom LoRA: {repo_id}")
175
+
176
+ # Create HTML card for display
177
+ card_html = f'''
178
+ <div class="custom_lora_card">
179
+ <span>Loaded custom LoRA:</span>
180
+ <div class="card_internal">
181
+ <img src="{image_url if image_url else 'https://huggingface.co/front/assets/huggingface_logo-noborder.svg'}" alt="{title}" style="width:80px; height:80px; object-fit:cover;" />
182
+ <div>
183
+ <h4>{title}</h4>
184
+ <small>Repo: {repo_id}<br>Weights: {weights_filename}<br>
185
+ {"Trigger: <code><b>"+trigger_word+"</code></b>" if trigger_word else "No trigger word found. If one is needed, include it in your prompt."}
186
+ </small>
187
+ </div>
188
+ </div>
189
+ </div>
190
+ '''
191
+
192
+ # Check if this LoRA (by repo_id) already exists
193
+ existing_item_index = next((index for (index, item) in enumerate(loras) if item['repo'] == repo_id), None)
194
+
195
+ new_item_data = {
196
+ "image": image_url,
197
+ "title": title,
198
+ "repo": repo_id,
199
+ "weights": weights_filename,
200
+ "trigger_word": trigger_word,
201
+ "custom": True # Mark as custom
202
+ }
203
+
204
+ if existing_item_index is not None:
205
+ loras[existing_item_index] = new_item_data # Update existing
206
+ else:
207
+ loras.append(new_item_data)
208
+ existing_item_index = len(loras) - 1
209
+
210
+ # Update gallery choices
211
+ gallery_choices = [(item.get("image", "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"), item["title"]) for item in loras]
212
+
213
+ return (
214
+ gr.update(visible=True, value=card_html),
215
+ gr.update(visible=True), # Show remove button
216
+ gr.update(choices=gallery_choices, value=None), # Update gallery, deselect
217
+ f"Custom LoRA '{title}' added. Select it from the gallery.", # Selected info text
218
+ None, # Reset selected_index state
219
+ "" # Clear custom LoRA input textbox
220
+ )
221
+
222
+ except Exception as e:
223
+ gr.Warning(f"Invalid Custom LoRA: {e}")
224
+ return gr.update(visible=True, value=f"<p style='color:red;'>Error adding LoRA: {e}</p>"), gr.update(visible=False), gr.update(), "", None, custom_lora_path_input
225
+ else: # No input
226
+ return gr.update(visible=False), gr.update(visible=False), gr.update(), "", None, ""
227
+
228
+ def remove_custom_lora_for_ltx(): # Renamed for clarity
229
+ global loras
230
+ # Remove the last added custom LoRA if it's marked (simplistic: assumes one custom at a time or last one)
231
+ # A more robust way would be to track the index of the custom LoRA being displayed.
232
+ # For now, let's find the *last* custom LoRA and remove it.
233
+ custom_lora_indices = [i for i, item in enumerate(loras) if item.get("custom")]
234
+ if custom_lora_indices:
235
+ loras.pop(custom_lora_indices[-1]) # Remove the last one marked as custom
236
+
237
+ gallery_choices = [(item.get("image", "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"), item["title"]) for item in loras]
238
+ return gr.update(visible=False, value=""), gr.update(visible=False), gr.update(choices=gallery_choices, value=None), "", None, ""
239
+
240
 
241
  def round_to_nearest_resolution_acceptable_by_vae(height, width):
 
242
  height = height - (height % pipe.vae_spatial_compression_ratio)
243
  width = width - (width % pipe.vae_spatial_compression_ratio)
 
244
  return height, width
245
 
246
+ def calculate_new_dimensions(orig_w, orig_h):
247
+ """Calculates new dimensions maintaining aspect ratio with one side fixed to TARGET_FIXED_SIDE."""
248
+ if orig_w == 0 or orig_h == 0: return MIN_DIM_SLIDER, MIN_DIM_SLIDER # Avoid division by zero
249
+
250
+ if orig_w > orig_h: # Landscape or square
251
+ new_w = TARGET_FIXED_SIDE
252
+ new_h = int(TARGET_FIXED_SIDE * orig_h / orig_w)
253
+ else: # Portrait
254
+ new_h = TARGET_FIXED_SIDE
255
+ new_w = int(TARGET_FIXED_SIDE * orig_w / orig_h)
256
+
257
+ # Ensure dimensions are at least MIN_DIM_SLIDER
258
+ new_w = max(MIN_DIM_SLIDER, new_w)
259
+ new_h = max(MIN_DIM_SLIDER, new_h)
260
+
261
+ # Ensure divisibility by VAE compression ratio (e.g., 32)
262
+ new_h, new_w = round_to_nearest_resolution_acceptable_by_vae(new_h, new_w)
263
+ return new_h, new_w
264
+
265
  def handle_image_upload_for_dims(image_filepath, current_h, current_w):
266
+ if not image_filepath:
 
267
  return gr.update(value=current_h), gr.update(value=current_w)
268
  try:
269
  img = Image.open(image_filepath)
 
272
  return gr.update(value=new_h), gr.update(value=new_w)
273
  except Exception as e:
274
  print(f"Error processing image for dimension update: {e}")
 
275
  return gr.update(value=current_h), gr.update(value=current_w)
276
 
277
  def handle_video_upload_for_dims(video_filepath, current_h, current_w):
278
+ if not video_filepath:
279
  return gr.update(value=current_h), gr.update(value=current_w)
280
  try:
281
+ video_filepath_str = str(video_filepath)
 
282
  if not os.path.exists(video_filepath_str):
283
  print(f"Video file path does not exist for dimension update: {video_filepath_str}")
284
  return gr.update(value=current_h), gr.update(value=current_w)
 
289
  if 'size' in meta:
290
  orig_w, orig_h = meta['size']
291
  else:
 
292
  try:
293
  first_frame = reader.get_data(0)
 
294
  orig_h, orig_w = first_frame.shape[0], first_frame.shape[1]
295
  except Exception as e_frame:
296
  print(f"Could not get video size from metadata or first frame: {e_frame}")
297
  return gr.update(value=current_h), gr.update(value=current_w)
298
 
299
+ if orig_w == -1 or orig_h == -1:
300
  print(f"Could not determine dimensions for video: {video_filepath_str}")
301
  return gr.update(value=current_h), gr.update(value=current_w)
302
 
303
  new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
304
  return gr.update(value=new_h), gr.update(value=new_w)
305
  except Exception as e:
 
306
  print(f"Error processing video for dimension update: {e} (Path: {video_filepath}, Type: {type(video_filepath)})")
307
  return gr.update(value=current_h), gr.update(value=current_w)
308
 
309
+ def update_task_image(): return "image-to-video"
310
+ def update_task_text(): return "text-to-video"
311
+ def update_task_video(): return "video-to-video"
312
 
313
+ def get_duration(prompt, negative_prompt, image, video, height, width, mode, steps, num_frames,
314
+ frames_to_use, seed, randomize_seed, guidance_scale, duration_input, improve_texture,
315
+ # New LoRA params
316
+ selected_lora_index, lora_scale_value,
317
+ progress): # Add selected_lora_index and lora_scale_value if they affect duration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  if duration_input > 7:
319
  return 75
320
  else:
321
  return 60
322
+
323
+ @spaces.GPU(duration=get_duration) # Needs selected_lora_index and lora_scale_value if get_duration uses them
324
  def generate(prompt,
325
  negative_prompt,
326
  image,
 
329
  width,
330
  mode,
331
  steps,
332
+ num_frames_slider_val, # Renamed to avoid conflict with internal num_frames
333
  frames_to_use,
334
  seed,
335
  randomize_seed,
336
  guidance_scale,
337
  duration_input,
338
+ improve_texture=False,
339
+ # New LoRA params
340
+ selected_lora_index=None,
341
+ lora_scale_value=0.8, # Default LoRA scale
342
+ progress=gr.Progress(track_tqdm=True)):
343
+
344
+ effective_prompt = prompt
345
 
346
+ # --- LoRA Handling ---
347
+ # Unload any existing LoRAs from main pipes first to prevent conflicts
348
+ # This should ideally be more granular if LoRAs are very large or loading is slow.
349
+ with calculateDuration("Unloading previous LoRAs"):
350
+ try:
351
+ pipe.unload_lora_weights()
352
+ print("Previous LoRAs unloaded if any.")
353
+ except Exception as e:
354
+ print(f"Note: Could not unload LoRAs (maybe none were loaded): {e}")
355
+
356
+ if selected_lora_index is not None and 0 <= selected_lora_index < len(loras):
357
+ selected_lora_data = loras[selected_lora_index]
358
+ lora_repo_id = selected_lora_data["repo"]
359
+ lora_weights_name = selected_lora_data.get("weights", None)
360
+ lora_trigger = selected_lora_data.get("trigger_word", "")
361
+
362
+ print(f"Selected LoRA: {selected_lora_data['title']} from {lora_repo_id}")
363
+ if lora_trigger:
364
+ print(f"Applying trigger word: {lora_trigger}")
365
+
366
+ if selected_lora_data.get("trigger_position") == "prepend":
367
+ effective_prompt = f"{lora_trigger} {prompt}"
368
+ else: # Default to append or if not specified
369
+ effective_prompt = f"{prompt} {lora_trigger}"
370
+
371
+ with calculateDuration(f"Loading LoRA weights for {selected_lora_data['title']}"):
372
+ try:
373
+ # Load into main generation pipe
374
+ pipe.load_lora_weights(
375
+ lora_repo_id,
376
+ weight_name=lora_weights_name,
377
+ adapter_name="active_lora" # Use a consistent adapter name
378
+ )
379
+ pipe.set_adapters(["active_lora"], adapter_weights=[lora_scale_value])
380
+ print(f"LoRA loaded into main pipe with scale {lora_scale_value}")
381
+
382
+ except Exception as e:
383
+ gr.Warning(f"Failed to load LoRA '{selected_lora_data['title']}': {e}. Proceeding without LoRA.")
384
+ print(f"Error loading LoRA: {e}")
385
+ # Ensure pipes are clean if loading failed mid-way (though unload_lora_weights should handle this)
386
+ try:
387
+ pipe.unload_lora_weights()
388
+ except: pass # Ignore errors here
389
+ else:
390
+ print("No LoRA selected or invalid index.")
391
+ # --- End LoRA Handling ---
392
+
393
  if randomize_seed:
394
  seed = random.randint(0, MAX_SEED)
395
 
 
396
  target_frames_ideal = duration_input * FPS
397
  target_frames_rounded = round(target_frames_ideal)
398
+ if target_frames_rounded < 1: target_frames_rounded = 1
 
399
 
400
  n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
401
  actual_num_frames = int(n_val * 8 + 1)
 
402
  actual_num_frames = max(9, actual_num_frames)
403
+ num_frames = min(MAX_NUM_FRAMES, actual_num_frames) # This num_frames is used by the pipe
404
 
405
  if mode == "video-to-video" and (video is not None):
406
+ loaded_video_frames = load_video(video)[:frames_to_use]
407
+ condition_input_video = True
408
+ width, height = loaded_video_frames[0].size
409
+ # steps = 4 # This was hardcoded, let user control steps
410
+ elif mode == "image-to-video" and (image is not None):
411
+ loaded_video_frames = [load_image(image)]
412
+ width, height = loaded_video_frames[0].size
413
+ condition_input_video = True
414
+ else: # text-to-video
415
+ condition_input_video=False
416
+ loaded_video_frames = None # No video frames for pure t2v
417
+
418
+ if condition_input_video and loaded_video_frames:
419
+ condition1 = LTXVideoCondition(video=loaded_video_frames, frame_index=0)
420
  else:
421
  condition1 = None
422
 
423
+ expected_height, expected_width = height, width
 
 
424
  downscale_factor = 2 / 3
425
  downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
426
  downscaled_height, downscaled_width = round_to_nearest_resolution_acceptable_by_vae(downscaled_height, downscaled_width)
427
+
428
  timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725]
429
  timesteps_second_pass = [1000, 909, 725, 421]
430
+ if steps == 8:
431
  timesteps_first_pass = [1000, 993, 987, 981, 975, 909, 725, 0.03]
432
  timesteps_second_pass = [1000, 909, 725, 421, 0]
433
+ elif 7 < steps < 8: # Non-integer steps could be an issue for these pre-defined timesteps
434
+ timesteps_first_pass = None
435
  timesteps_second_pass = None
436
 
437
+ with calculateDuration("Main pipe generation"):
438
+ latents = pipe(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  conditions=condition1,
440
+ prompt=effective_prompt, # Use prompt with trigger word
441
  negative_prompt=negative_prompt,
442
+ width=downscaled_width,
443
+ height=downscaled_height,
444
  num_frames=num_frames,
445
+ num_inference_steps=steps,
 
 
 
 
446
  decode_timestep=0.05,
447
  decode_noise_scale=0.025,
448
+ timesteps=timesteps_first_pass,
449
  image_cond_noise_scale=0.0,
450
  guidance_rescale=0.7,
451
+ guidance_scale=guidance_scale,
452
+ generator=torch.Generator(device=device).manual_seed(seed),
453
+ output_type="latent",
454
+ ).frames
455
+
456
+ final_video_frames_np = None # Initialize
457
+ if improve_texture:
458
+ upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2 # These are internal, not user-facing W/H
459
+ with calculateDuration("Latent upscaling"):
460
+ upscaled_latents = pipe_upsample(
461
+ latents=latents,
462
+ adain_factor=1.0,
463
+ output_type="latent"
464
+ ).frames
465
+
466
+ with calculateDuration("Denoising upscaled video"):
467
+ final_video_frames_np = pipe( # Using main pipe for denoising
468
+ conditions=condition1, # Re-pass condition if applicable
469
+ prompt=effective_prompt,
470
+ negative_prompt=negative_prompt,
471
+ width=upscaled_width, # Use upscaled dimensions for this pass
472
+ height=upscaled_height,
473
+ num_frames=num_frames,
474
+ guidance_scale=guidance_scale,
475
+ denoise_strength=0.999,
476
+ timesteps=timesteps_second_pass,
477
+ num_inference_steps=10, # Or make this configurable
478
+ latents=upscaled_latents,
479
+ decode_timestep=0.05,
480
+ decode_noise_scale=0.025,
481
+ image_cond_noise_scale=0.0,
482
+ guidance_rescale=0.7,
483
+ generator=torch.Generator(device=device).manual_seed(seed),
484
+ output_type="np",
485
+ ).frames[0]
486
+ else: # No texture improvement, just upscale latents and decode
487
+ with calculateDuration("Latent upscaling and decoding (no improve_texture)"):
488
+ final_video_frames_np = pipe_upsample(
489
+ latents=latents,
490
+ output_type="np" # Decode directly
491
+ ).frames[0]
492
 
493
+ # Video saving
494
+ video_uint8_frames = [(frame * 255).astype(np.uint8) for frame in final_video_frames_np]
495
+ output_filename = "output.mp4"
496
+ with calculateDuration("Saving video to mp4"):
497
+ with imageio.get_writer(output_filename, fps=FPS, quality=8, macro_block_size=1) as writer: # Removed bitrate=None
498
+ for frame_idx, frame_data in enumerate(video_uint8_frames):
499
+ progress((frame_idx + 1) / len(video_uint8_frames), desc="Encoding video frames...")
500
+ writer.append_data(frame_data)
 
 
 
501
 
502
+ return output_filename, seed # Return seed for display
 
 
503
 
504
+ # --- Gradio UI ---
505
  css="""
506
+ #col-container { margin: 0 auto; max-width: 1000px; } /* Increased max-width for gallery */
507
+ #gallery .grid-wrap{height: 20vh !important; max-height: 250px !important;} /* From app (2), adjusted height */
508
+ .custom_lora_card { border: 1px solid #e0e0e0; border-radius: 8px; padding: 10px; margin-top: 10px; background-color: #f9f9f9; }
509
+ .card_internal { display: flex; align-items: center; }
510
+ .card_internal img { margin-right: 1em; border-radius: 4px; }
511
+ .card_internal div h4 { margin-bottom: 0.2em; }
512
+ .card_internal div small { font-size: 0.9em; color: #555; }
513
+ #lora_list_link { font-size: 90%; background: var(--block-background-fill); padding: 0.5em 1em; border-radius: 8px; display:inline-block; margin-top:10px;}
 
 
 
 
 
 
 
514
  """
515
 
516
  with gr.Blocks(css=css, theme=gr.themes.Ocean()) as demo:
517
+ gr.Markdown("# LTX Video 0.9.7 Distilled with LoRA Explorer")
518
+ gr.Markdown("Fast high quality video generation with custom LoRA support. [Model](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltxv-13b-0.9.7-distilled.safetensors) [GitHub](https://github.com/Lightricks/LTX-Video)")
 
519
 
520
+ selected_lora_index_state = gr.State(None)
521
+
522
+ with gr.Row():
523
+ with gr.Column(scale=2): # Main controls
524
  with gr.Tab("image-to-video") as image_tab:
525
  with gr.Group():
526
  video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
527
+ image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "clipboard"]) # Removed webcam
528
  i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
529
  i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
530
  with gr.Tab("text-to-video") as text_tab:
 
537
  with gr.Group():
538
  image_v_hidden = gr.Textbox(label="image_v", visible=False, value=None)
539
  video_v2v = gr.Video(label="Input Video")
540
+ frames_to_use_slider = gr.Slider(label="Frames to use from input video", minimum=9, maximum=MAX_NUM_FRAMES, value=9, step=8, info="Number of initial frames for conditioning. Must be N*8+1.")
541
  v2v_prompt = gr.Textbox(label="Prompt", value="Change the style to cinematic anime", lines=3)
542
  v2v_button = gr.Button("Generate Video-to-Video", variant="primary")
543
 
544
+ duration_slider = gr.Slider(
545
+ label="Video Duration (seconds)", minimum=0.3, maximum=8.5, value=2, step=0.1,
546
+ info="Target video duration (0.3s to 8.5s). Actual frames depend on model constraints (multiple of 8 + 1)."
 
 
 
 
547
  )
548
+ improve_texture_checkbox = gr.Checkbox(label="Improve Texture (multi-scale)", value=True, info="Uses a two-pass generation for better quality, but is slower.")
549
+
550
+ with gr.Column(scale=1): # LoRA Gallery and Output
551
+ selected_lora_info_markdown = gr.Markdown("No LoRA selected.")
552
+ lora_gallery_display = gr.Gallery(
553
+ # Ensure loras is a list of (image_url, title) tuples or similar
554
+ value=[(item.get("image", "https://huggingface.co/front/assets/huggingface_logo-noborder.svg"), item["title"]) for item in loras] if loras else [],
555
+ label="LoRA Gallery",
556
+ allow_preview=True, preview=True,
557
+ columns=2, height="auto", object_fit="contain", # Adjusted for better display
558
+ elem_id="gallery"
559
+ )
560
+ with gr.Group():
561
+ custom_lora_input_path = gr.Textbox(label="Add Custom LoRA from Hugging Face", info="Path like 'username/repo-name'", placeholder="e.g., multimodalart/flux-lora-example (but for LTX!)")
562
+ gr.Markdown("[Find LTX-compatible LoRAs on Hugging Face](https://huggingface.co/models?other=base_model:Lightricks/LTX-Video-0.9.7-distilled&sort=trending)", elem_id="lora_list_link")
563
+
564
+ custom_lora_status_html = gr.HTML(visible=False) # For displaying custom LoRA card
565
+ remove_custom_lora_button = gr.Button("Remove Last Added Custom LoRA", visible=False)
566
+
567
+ output_video = gr.Video(label="Generated Video", interactive=False)
568
+ # output_seed_info = gr.Textbox(label="Seed Used", interactive=False) # Add this to show seed
569
  gr.DeepLinkButton()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
 
571
+ with gr.Accordion("Advanced settings", open=False):
572
+ mode_dropdown = gr.Dropdown(["text-to-video", "image-to-video", "video-to-video"], label="Task Mode", value="image-to-video", visible=False) # Keep internal
573
+ negative_prompt_textbox = gr.Textbox(label="Negative Prompt", value="worst quality, inconsistent motion, blurry, jittery, distorted", lines=2)
574
+ with gr.Row():
575
+ seed_number_input = gr.Number(label="Seed", value=0, precision=0)
576
+ randomize_seed_checkbox = gr.Checkbox(label="Randomize Seed", value=True)
577
+ with gr.Row():
578
+ guidance_scale_slider = gr.Slider(label="Guidance Scale (CFG)", minimum=0, maximum=10, value=1.0, step=0.1) # LTX uses low CFG
579
+ steps_slider = gr.Slider(label="Inference Steps (Main Pass)", minimum=1, maximum=30, value=7, step=1) # Default steps for LTX
580
+ # num_frames_slider = gr.Slider(label="# Frames (Debug - Overridden by Duration)", minimum=9, maximum=MAX_NUM_FRAMES, value=96, step=8, visible=False) # Hidden, as duration controls it
581
+ with gr.Row():
582
+ height_slider = gr.Slider(label="Target Height", value=512, step=pipe.vae_spatial_compression_ratio, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info=f"Must be divisible by {pipe.vae_spatial_compression_ratio}.")
583
+ width_slider = gr.Slider(label="Target Width", value=704, step=pipe.vae_spatial_compression_ratio, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info=f"Must be divisible by {pipe.vae_spatial_compression_ratio}.")
584
+ with gr.Row():
585
+ lora_scale_slider = gr.Slider(label="LoRA Scale", minimum=0.0, maximum=2.0, step=0.05, value=0.8, info="Adjusts the influence of the selected LoRA.")
586
+
587
+
588
+ # --- Event Handlers ---
589
+ image_i2v.upload(fn=handle_image_upload_for_dims, inputs=[image_i2v, height_slider, width_slider], outputs=[height_slider, width_slider])
590
+ video_v2v.upload(fn=handle_video_upload_for_dims, inputs=[video_v2v, height_slider, width_slider], outputs=[height_slider, width_slider])
591
+ video_v2v.clear(lambda cur_h, cur_w: (gr.update(value=cur_h), gr.update(value=cur_w)), inputs=[height_slider, width_slider], outputs=[height_slider, width_slider])
592
+ image_i2v.clear(lambda cur_h, cur_w: (gr.update(value=cur_h), gr.update(value=cur_w)), inputs=[height_slider, width_slider], outputs=[height_slider, width_slider])
593
+
594
+
595
+ image_tab.select(fn=update_task_image, outputs=[mode_dropdown])
596
+ text_tab.select(fn=update_task_text, outputs=[mode_dropdown])
597
+ video_tab.select(fn=update_task_video, outputs=[mode_dropdown])
598
+
599
+ # LoRA Gallery Callbacks
600
+ lora_gallery_display.select(
601
+ update_lora_selection,
602
+ outputs=[selected_lora_info_markdown, selected_lora_index_state]
603
  )
604
+ custom_lora_input_path.submit(
605
+ add_custom_lora_for_ltx,
606
+ inputs=[custom_lora_input_path],
607
+ outputs=[custom_lora_status_html, remove_custom_lora_button, lora_gallery_display, selected_lora_info_markdown, selected_lora_index_state, custom_lora_input_path]
608
  )
609
+ remove_custom_lora_button.click(
610
+ remove_custom_lora_for_ltx,
611
+ outputs=[custom_lora_status_html, remove_custom_lora_button, lora_gallery_display, selected_lora_info_markdown, selected_lora_index_state, custom_lora_input_path]
612
  )
613
 
614
+ # Consolidate inputs for generate function
615
+ gen_inputs = [
616
+ negative_prompt_textbox,
617
+ # image, video (passed specifically by each button)
618
+ height_slider, width_slider, mode_dropdown, steps_slider,
619
+ gr.Number(value=96, visible=False), # placeholder for num_frames_slider_val, as it's controlled by duration
620
+ frames_to_use_slider,
621
+ seed_number_input, randomize_seed_checkbox, guidance_scale_slider, duration_slider, improve_texture_checkbox,
622
+ selected_lora_index_state, lora_scale_slider
623
+ ]
624
+
625
+ t2v_button.click(fn=generate,
626
+ inputs=[t2v_prompt, image_n_hidden, video_n_hidden] + gen_inputs,
627
+ outputs=[output_video, seed_number_input]) # Added seed_number_input to outputs
628
+
629
+ i2v_button.click(fn=generate,
630
+ inputs=[i2v_prompt, image_i2v, video_i_hidden] + gen_inputs,
631
+ outputs=[output_video, seed_number_input])
632
+
633
+ v2v_button.click(fn=generate,
634
+ inputs=[v2v_prompt, image_v_hidden, video_v2v] + gen_inputs,
635
+ outputs=[output_video, seed_number_input])
636
+
637
+ demo.queue(max_size=10).launch()