markury commited on
Commit
43084dd
·
1 Parent(s): dabb250
Files changed (1) hide show
  1. app.py +15 -214
app.py CHANGED
@@ -5,9 +5,6 @@ from diffusers.utils import export_to_video
5
  from diffusers import AutoencoderKLWan, WanPipeline
6
  from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
7
  from diffusers.schedulers.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
8
- import os
9
- import tempfile
10
- from typing import List, Union, Optional
11
 
12
  # Define model options
13
  MODEL_OPTIONS = {
@@ -36,15 +33,8 @@ def generate_video(
36
  num_frames,
37
  guidance_scale,
38
  num_inference_steps,
39
- output_fps,
40
- # Second pass parameters
41
- enable_second_pass,
42
- second_pass_scale,
43
- second_pass_denoise,
44
- second_pass_flow_shift,
45
- second_pass_cfg,
46
- show_both_outputs
47
- ) -> tuple:
48
  # Get model ID from selection
49
  model_id = MODEL_OPTIONS[model_choice]
50
 
@@ -86,130 +76,25 @@ def generate_video(
86
  # Enable CPU offload for low VRAM
87
  pipe.enable_model_cpu_offload()
88
 
89
- # Keep track of output files for return
90
- output_files = []
91
-
92
- # First pass - generate base video
93
- print("Running first pass...")
94
- first_pass = pipe(
95
  prompt=prompt,
96
  negative_prompt=negative_prompt,
97
  height=height,
98
  width=width,
99
  num_frames=num_frames,
100
  guidance_scale=guidance_scale,
101
- num_inference_steps=num_inference_steps,
102
- # For Wan, we may need to approach this differently for the latents
103
- output_type="pt", # Always get PyTorch tensors for the first pass
104
- return_dict=True
105
- )
106
-
107
- # Get the frames or latents from the first pass output
108
- first_pass_frames = first_pass.frames[0]
109
-
110
- # Output the first pass video if needed
111
- if not enable_second_pass or (enable_second_pass and show_both_outputs):
112
- # Export first pass to video
113
- first_pass_file = "output_first_pass.mp4"
114
- export_to_video(first_pass_frames, first_pass_file, fps=output_fps)
115
- output_files.append(first_pass_file)
116
 
117
- # Second pass - upscale and refine if enabled
118
- second_pass_file = None
119
- if enable_second_pass:
120
- print("Running second pass with scale factor:", second_pass_scale)
121
-
122
- # For second pass, we need to first encode the frames to get latents
123
- print("Encoding first pass frames to latents...")
124
- with torch.no_grad():
125
- # Move frames to the same device as the VAE
126
- first_pass_frames = first_pass_frames.to(pipe.vae.device)
127
- # Encode to get latents
128
- latents = pipe.vae.encode(first_pass_frames).latent_dist.sample()
129
-
130
- # Resize latents for second pass (upscale)
131
- new_height = int(height * second_pass_scale)
132
- new_width = int(width * second_pass_scale)
133
-
134
- # Ensure dimensions are multiples of 8
135
- new_height = (new_height // 8) * 8
136
- new_width = (new_width // 8) * 8
137
-
138
- print(f"Upscaling latents from {height}x{width} to {new_height}x{new_width}")
139
-
140
- # Get latent dimensions
141
- latent_height = latents.shape[2] # Should be height//8
142
- latent_width = latents.shape[3] # Should be width//8
143
-
144
- # Calculate new latent dimensions
145
- new_latent_height = new_height // 8
146
- new_latent_width = new_width // 8
147
-
148
- # Upscale latents using interpolate
149
- upscaled_latents = torch.nn.functional.interpolate(
150
- latents,
151
- size=(num_frames, new_latent_height, new_latent_width),
152
- mode="trilinear",
153
- align_corners=False
154
- )
155
-
156
- # Update scheduler for second pass if using different flow shift
157
- if scheduler_type == "UniPCMultistepScheduler":
158
- pipe.scheduler = UniPCMultistepScheduler.from_config(
159
- pipe.scheduler.config,
160
- flow_shift=second_pass_flow_shift
161
- )
162
- else:
163
- pipe.scheduler = FlowMatchEulerDiscreteScheduler(shift=second_pass_flow_shift)
164
-
165
- # Calculate noise level for partial denoising
166
- # For noise scheduler, 0 means no noise (final step) and 1 means full noise (first step)
167
- # So we convert our denoise strength to a timestep in the schedule
168
- start_step = int(second_pass_denoise * num_inference_steps)
169
-
170
- # Run second pass with the upscaled latents and partial denoising
171
- print(f"Denoising from step {start_step} of {num_inference_steps} (denoise strength: {second_pass_denoise})")
172
-
173
- # Use the second pass CFG value
174
- second_pass_guidance = second_pass_cfg if second_pass_cfg > 0 else guidance_scale
175
-
176
- second_pass = pipe(
177
- prompt=prompt,
178
- negative_prompt=negative_prompt,
179
- height=new_height,
180
- width=new_width,
181
- num_frames=num_frames,
182
- guidance_scale=second_pass_guidance,
183
- num_inference_steps=num_inference_steps,
184
- latents=upscaled_latents, # Use the upscaled latents
185
- strength=second_pass_denoise, # Partial denoising
186
- output_type="pt",
187
- return_dict=True
188
- )
189
-
190
- # Export second pass to video
191
- second_pass_file = "output_second_pass.mp4"
192
- export_to_video(second_pass.frames[0], second_pass_file, fps=output_fps)
193
- output_files.append(second_pass_file)
194
 
195
- # Return the appropriate video output(s)
196
- if enable_second_pass and show_both_outputs and len(output_files) > 1:
197
- return output_files[0], output_files[1] # Return both first and second pass
198
- elif len(output_files) > 0:
199
- if enable_second_pass:
200
- # Return only second pass (and None for first output if showing both)
201
- return None if show_both_outputs else output_files[0], output_files[0]
202
- else:
203
- # Return first pass only
204
- return output_files[0], None
205
- else:
206
- return None, None
207
 
208
- # Create the Gradio interface
209
  with gr.Blocks() as demo:
210
- # Import gr.update for visibility control
211
- from gradio import update
212
-
213
  gr.HTML("""
214
  <p align="center">
215
  <svg version="1.1" viewBox="0 0 1200 295" xmlns="http://www.w3.org/2000/svg" xmlns:v="https://vecta.io/nano" width="400">
@@ -219,7 +104,7 @@ with gr.Blocks() as demo:
219
  💻 <a href="https://www.markury.dev/"><b>Website</b></a> &nbsp&nbsp | &nbsp&nbsp 🤗 <a href="https://huggingface.co/markury"><b>Hugging Face</b></a> &nbsp&nbsp | &nbsp&nbsp 💿 <a href="https://thebulge.xyz"><b>Discord</b></a>
220
  </p>
221
  """)
222
- gr.Markdown("# Wan 2.1 T2V 1.3B with LoRA and Second Pass Refinement")
223
 
224
  with gr.Row():
225
  with gr.Column(scale=1):
@@ -324,82 +209,11 @@ with gr.Blocks() as demo:
324
  step=1
325
  )
326
 
327
- # Add Second Pass options
328
- with gr.Accordion("Second Pass Refinement (HiresFix)", open=False):
329
- enable_second_pass = gr.Checkbox(
330
- label="Enable Second Pass Refinement",
331
- value=False,
332
- info="Scale up and refine the video with a second denoising pass"
333
- )
334
-
335
- with gr.Row():
336
- second_pass_scale = gr.Slider(
337
- label="Scale Factor",
338
- minimum=1.0,
339
- maximum=2.0,
340
- value=1.25,
341
- step=0.05,
342
- info="How much to upscale the video for refinement"
343
- )
344
- second_pass_denoise = gr.Slider(
345
- label="Denoise Strength",
346
- minimum=0.1,
347
- maximum=1.0,
348
- value=0.6,
349
- step=0.05,
350
- info="Lower values preserve more of the original details"
351
- )
352
-
353
- with gr.Row():
354
- second_pass_flow_shift = gr.Slider(
355
- label="Second Pass Flow Shift",
356
- minimum=1.0,
357
- maximum=12.0,
358
- value=3.0,
359
- step=0.5,
360
- info="Flow shift value for the second pass (optional)"
361
- )
362
- second_pass_cfg = gr.Slider(
363
- label="Second Pass CFG",
364
- minimum=0.0,
365
- maximum=15.0,
366
- value=0.0,
367
- step=0.5,
368
- info="Set to 0 to use the same value as first pass"
369
- )
370
-
371
- show_both_outputs = gr.Checkbox(
372
- label="Show Both Outputs",
373
- value=False,
374
- info="Display both original and refined videos"
375
- )
376
-
377
  generate_btn = gr.Button("Generate Video")
378
 
379
  with gr.Column(scale=1):
380
- # Updated output to handle multiple videos if both outputs are selected
381
- with gr.Group():
382
- output_video = gr.Video(label="Generated Video")
383
- second_output_video = gr.Video(label="Second Pass Video", visible=False)
384
-
385
- # Control visibility through the UI changes directly
386
- def toggle_second_video(enable_pass, show_both):
387
- return gr.update(visible=enable_pass and show_both)
388
-
389
- # Update visibility when checkboxes change
390
- enable_second_pass.change(
391
- fn=toggle_second_video,
392
- inputs=[enable_second_pass, show_both_outputs],
393
- outputs=[second_output_video]
394
- )
395
-
396
- show_both_outputs.change(
397
- fn=toggle_second_video,
398
- inputs=[enable_second_pass, show_both_outputs],
399
- outputs=[second_output_video]
400
- )
401
 
402
- # Fixed output handling for Gradio
403
  generate_btn.click(
404
  fn=generate_video,
405
  inputs=[
@@ -416,16 +230,9 @@ with gr.Blocks() as demo:
416
  num_frames,
417
  guidance_scale,
418
  num_inference_steps,
419
- output_fps,
420
- # Second pass parameters
421
- enable_second_pass,
422
- second_pass_scale,
423
- second_pass_denoise,
424
- second_pass_flow_shift,
425
- second_pass_cfg,
426
- show_both_outputs
427
  ],
428
- outputs=[output_video, second_output_video]
429
  )
430
 
431
  gr.Markdown("""
@@ -435,12 +242,6 @@ with gr.Blocks() as demo:
435
  - Number of frames should be of the form 4k+1 (e.g., 33, 81)
436
  - Stick to lower frame counts. Even at 480p, an 81 frame sequence at 30 steps will nearly time out the request in this space.
437
 
438
- ## Second Pass Refinement Tips:
439
- - The second pass (similar to HiresFix) can enhance details by upscaling and refining the video
440
- - Start with a scale factor around 1.25 and denoise strength of 0.6
441
- - Lower denoise values preserve more of the original video structure
442
- - The second pass will increase generation time substantially - use with caution!
443
-
444
  ## Using LoRAs with multiple safetensors files:
445
  If you encounter an error stating "more than one weights file", you need to specify the exact weight file name in the "LoRA Weight Name" field.
446
  You can find this by browsing the repository on Hugging Face and looking for the safetensors files (common names include: adapter_model.safetensors, pytorch_lora_weights.safetensors).
 
5
  from diffusers import AutoencoderKLWan, WanPipeline
6
  from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
7
  from diffusers.schedulers.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
 
 
 
8
 
9
  # Define model options
10
  MODEL_OPTIONS = {
 
33
  num_frames,
34
  guidance_scale,
35
  num_inference_steps,
36
+ output_fps
37
+ ):
 
 
 
 
 
 
 
38
  # Get model ID from selection
39
  model_id = MODEL_OPTIONS[model_choice]
40
 
 
76
  # Enable CPU offload for low VRAM
77
  pipe.enable_model_cpu_offload()
78
 
79
+ # Generate video
80
+ output = pipe(
 
 
 
 
81
  prompt=prompt,
82
  negative_prompt=negative_prompt,
83
  height=height,
84
  width=width,
85
  num_frames=num_frames,
86
  guidance_scale=guidance_scale,
87
+ num_inference_steps=num_inference_steps
88
+ ).frames[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ # Export to video
91
+ temp_file = "output.mp4"
92
+ export_to_video(output, temp_file, fps=output_fps)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
+ return temp_file
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ # Create the Gradio interface
97
  with gr.Blocks() as demo:
 
 
 
98
  gr.HTML("""
99
  <p align="center">
100
  <svg version="1.1" viewBox="0 0 1200 295" xmlns="http://www.w3.org/2000/svg" xmlns:v="https://vecta.io/nano" width="400">
 
104
  💻 <a href="https://www.markury.dev/"><b>Website</b></a> &nbsp&nbsp | &nbsp&nbsp 🤗 <a href="https://huggingface.co/markury"><b>Hugging Face</b></a> &nbsp&nbsp | &nbsp&nbsp 💿 <a href="https://thebulge.xyz"><b>Discord</b></a>
105
  </p>
106
  """)
107
+ gr.Markdown("# Wan 2.1 T2V 1.3B with LoRA")
108
 
109
  with gr.Row():
110
  with gr.Column(scale=1):
 
209
  step=1
210
  )
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  generate_btn = gr.Button("Generate Video")
213
 
214
  with gr.Column(scale=1):
215
+ output_video = gr.Video(label="Generated Video")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
 
217
  generate_btn.click(
218
  fn=generate_video,
219
  inputs=[
 
230
  num_frames,
231
  guidance_scale,
232
  num_inference_steps,
233
+ output_fps
 
 
 
 
 
 
 
234
  ],
235
+ outputs=output_video
236
  )
237
 
238
  gr.Markdown("""
 
242
  - Number of frames should be of the form 4k+1 (e.g., 33, 81)
243
  - Stick to lower frame counts. Even at 480p, an 81 frame sequence at 30 steps will nearly time out the request in this space.
244
 
 
 
 
 
 
 
245
  ## Using LoRAs with multiple safetensors files:
246
  If you encounter an error stating "more than one weights file", you need to specify the exact weight file name in the "LoRA Weight Name" field.
247
  You can find this by browsing the repository on Hugging Face and looking for the safetensors files (common names include: adapter_model.safetensors, pytorch_lora_weights.safetensors).