Wan-2.1-T2V-1.3B-GPU

Paused

App Files Files Community

markury commited on Mar 19

Commit

43084dd

1 Parent(s): dabb250

revert: ca11d336924e0d911c44244f412d335601a6d869

Browse files

Files changed (1) hide show

app.py +15 -214

app.py CHANGED Viewed

@@ -5,9 +5,6 @@ from diffusers.utils import export_to_video
 from diffusers import AutoencoderKLWan, WanPipeline
 from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
 from diffusers.schedulers.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
-import os
-import tempfile
-from typing import List, Union, Optional
 # Define model options
 MODEL_OPTIONS = {
@@ -36,15 +33,8 @@ def generate_video(
     num_frames,
     guidance_scale,
     num_inference_steps,
-    output_fps,
-    # Second pass parameters
-    enable_second_pass,
-    second_pass_scale,
-    second_pass_denoise,
-    second_pass_flow_shift,
-    second_pass_cfg,
-    show_both_outputs
-) -> tuple:
     # Get model ID from selection
     model_id = MODEL_OPTIONS[model_choice]
@@ -86,130 +76,25 @@ def generate_video(
     # Enable CPU offload for low VRAM
     pipe.enable_model_cpu_offload()
-    # Keep track of output files for return
-    output_files = []
-    # First pass - generate base video
-    print("Running first pass...")
-    first_pass = pipe(
         prompt=prompt,
         negative_prompt=negative_prompt,
         height=height,
         width=width,
         num_frames=num_frames,
         guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        # For Wan, we may need to approach this differently for the latents
-        output_type="pt",  # Always get PyTorch tensors for the first pass
-        return_dict=True
-    )
-    # Get the frames or latents from the first pass output
-    first_pass_frames = first_pass.frames[0]
-    # Output the first pass video if needed
-    if not enable_second_pass or (enable_second_pass and show_both_outputs):
-        # Export first pass to video
-        first_pass_file = "output_first_pass.mp4"
-        export_to_video(first_pass_frames, first_pass_file, fps=output_fps)
-        output_files.append(first_pass_file)
-    # Second pass - upscale and refine if enabled
-    second_pass_file = None
-    if enable_second_pass:
-        print("Running second pass with scale factor:", second_pass_scale)
-        # For second pass, we need to first encode the frames to get latents
-        print("Encoding first pass frames to latents...")
-        with torch.no_grad():
-            # Move frames to the same device as the VAE
-            first_pass_frames = first_pass_frames.to(pipe.vae.device)
-            # Encode to get latents
-            latents = pipe.vae.encode(first_pass_frames).latent_dist.sample()
-        # Resize latents for second pass (upscale)
-        new_height = int(height * second_pass_scale)
-        new_width = int(width * second_pass_scale)
-        # Ensure dimensions are multiples of 8
-        new_height = (new_height // 8) * 8
-        new_width = (new_width // 8) * 8
-        print(f"Upscaling latents from {height}x{width} to {new_height}x{new_width}")
-        # Get latent dimensions
-        latent_height = latents.shape[2]  # Should be height//8
-        latent_width = latents.shape[3]   # Should be width//8
-        # Calculate new latent dimensions
-        new_latent_height = new_height // 8
-        new_latent_width = new_width // 8
-        # Upscale latents using interpolate
-        upscaled_latents = torch.nn.functional.interpolate(
-            latents,
-            size=(num_frames, new_latent_height, new_latent_width),
-            mode="trilinear",
-            align_corners=False
-        )
-        # Update scheduler for second pass if using different flow shift
-        if scheduler_type == "UniPCMultistepScheduler":
-            pipe.scheduler = UniPCMultistepScheduler.from_config(
-                pipe.scheduler.config,
-                flow_shift=second_pass_flow_shift
-            )
-        else:
-            pipe.scheduler = FlowMatchEulerDiscreteScheduler(shift=second_pass_flow_shift)
-        # Calculate noise level for partial denoising
-        # For noise scheduler, 0 means no noise (final step) and 1 means full noise (first step)
-        # So we convert our denoise strength to a timestep in the schedule
-        start_step = int(second_pass_denoise * num_inference_steps)
-        # Run second pass with the upscaled latents and partial denoising
-        print(f"Denoising from step {start_step} of {num_inference_steps} (denoise strength: {second_pass_denoise})")
-        # Use the second pass CFG value
-        second_pass_guidance = second_pass_cfg if second_pass_cfg > 0 else guidance_scale
-        second_pass = pipe(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            height=new_height,
-            width=new_width,
-            num_frames=num_frames,
-            guidance_scale=second_pass_guidance,
-            num_inference_steps=num_inference_steps,
-            latents=upscaled_latents,  # Use the upscaled latents
-            strength=second_pass_denoise,  # Partial denoising
-            output_type="pt",
-            return_dict=True
-        )
-        # Export second pass to video
-        second_pass_file = "output_second_pass.mp4"
-        export_to_video(second_pass.frames[0], second_pass_file, fps=output_fps)
-        output_files.append(second_pass_file)
-    # Return the appropriate video output(s)
-    if enable_second_pass and show_both_outputs and len(output_files) > 1:
-        return output_files[0], output_files[1]  # Return both first and second pass
-    elif len(output_files) > 0:
-        if enable_second_pass:
-            # Return only second pass (and None for first output if showing both)
-            return None if show_both_outputs else output_files[0], output_files[0]
-        else:
-            # Return first pass only
-            return output_files[0], None
-    else:
-        return None, None
-    # Create the Gradio interface
 with gr.Blocks() as demo:
-    # Import gr.update for visibility control
-    from gradio import update
     gr.HTML("""
     <p align="center">
     <svg version="1.1" viewBox="0 0 1200 295" xmlns="http://www.w3.org/2000/svg" xmlns:v="https://vecta.io/nano" width="400">
@@ -219,7 +104,7 @@ with gr.Blocks() as demo:
     💻 <a href="https://www.markury.dev/"><b>Website</b></a> &nbsp&nbsp | &nbsp&nbsp 🤗 <a href="https://huggingface.co/markury"><b>Hugging Face</b></a> &nbsp&nbsp | &nbsp&nbsp 💿 <a href="https://thebulge.xyz"><b>Discord</b></a>
     </p>
     """)
-    gr.Markdown("# Wan 2.1 T2V 1.3B with LoRA and Second Pass Refinement")
     with gr.Row():
         with gr.Column(scale=1):
@@ -324,82 +209,11 @@ with gr.Blocks() as demo:
                     step=1
                 )
-            # Add Second Pass options
-            with gr.Accordion("Second Pass Refinement (HiresFix)", open=False):
-                enable_second_pass = gr.Checkbox(
-                    label="Enable Second Pass Refinement",
-                    value=False,
-                    info="Scale up and refine the video with a second denoising pass"
-                )
-                with gr.Row():
-                    second_pass_scale = gr.Slider(
-                        label="Scale Factor",
-                        minimum=1.0,
-                        maximum=2.0,
-                        value=1.25,
-                        step=0.05,
-                        info="How much to upscale the video for refinement"
-                    )
-                    second_pass_denoise = gr.Slider(
-                        label="Denoise Strength",
-                        minimum=0.1,
-                        maximum=1.0,
-                        value=0.6,
-                        step=0.05,
-                        info="Lower values preserve more of the original details"
-                    )
-                with gr.Row():
-                    second_pass_flow_shift = gr.Slider(
-                        label="Second Pass Flow Shift",
-                        minimum=1.0,
-                        maximum=12.0,
-                        value=3.0,
-                        step=0.5,
-                        info="Flow shift value for the second pass (optional)"
-                    )
-                    second_pass_cfg = gr.Slider(
-                        label="Second Pass CFG",
-                        minimum=0.0,
-                        maximum=15.0,
-                        value=0.0,
-                        step=0.5,
-                        info="Set to 0 to use the same value as first pass"
-                    )
-                show_both_outputs = gr.Checkbox(
-                    label="Show Both Outputs",
-                    value=False,
-                    info="Display both original and refined videos"
-                )
             generate_btn = gr.Button("Generate Video")
         with gr.Column(scale=1):
-            # Updated output to handle multiple videos if both outputs are selected
-            with gr.Group():
-                output_video = gr.Video(label="Generated Video")
-                second_output_video = gr.Video(label="Second Pass Video", visible=False)
-                # Control visibility through the UI changes directly
-                def toggle_second_video(enable_pass, show_both):
-                    return gr.update(visible=enable_pass and show_both)
-                # Update visibility when checkboxes change
-                enable_second_pass.change(
-                    fn=toggle_second_video,
-                    inputs=[enable_second_pass, show_both_outputs],
-                    outputs=[second_output_video]
-                )
-                show_both_outputs.change(
-                    fn=toggle_second_video,
-                    inputs=[enable_second_pass, show_both_outputs],
-                    outputs=[second_output_video]
-                )
-    # Fixed output handling for Gradio
     generate_btn.click(
         fn=generate_video,
         inputs=[
@@ -416,16 +230,9 @@ with gr.Blocks() as demo:
             num_frames,
             guidance_scale,
             num_inference_steps,
-            output_fps,
-            # Second pass parameters
-            enable_second_pass,
-            second_pass_scale,
-            second_pass_denoise,
-            second_pass_flow_shift,
-            second_pass_cfg,
-            show_both_outputs
         ],
-        outputs=[output_video, second_output_video]
     )
     gr.Markdown("""
@@ -435,12 +242,6 @@ with gr.Blocks() as demo:
     - Number of frames should be of the form 4k+1 (e.g., 33, 81)
     - Stick to lower frame counts. Even at 480p, an 81 frame sequence at 30 steps will nearly time out the request in this space.
-    ## Second Pass Refinement Tips:
-    - The second pass (similar to HiresFix) can enhance details by upscaling and refining the video
-    - Start with a scale factor around 1.25 and denoise strength of 0.6
-    - Lower denoise values preserve more of the original video structure
-    - The second pass will increase generation time substantially - use with caution!
     ## Using LoRAs with multiple safetensors files:
     If you encounter an error stating "more than one weights file", you need to specify the exact weight file name in the "LoRA Weight Name" field.
     You can find this by browsing the repository on Hugging Face and looking for the safetensors files (common names include: adapter_model.safetensors, pytorch_lora_weights.safetensors).

 from diffusers import AutoencoderKLWan, WanPipeline
 from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
 from diffusers.schedulers.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
 # Define model options
 MODEL_OPTIONS = {
     num_frames,
     guidance_scale,
     num_inference_steps,
+    output_fps
+):
     # Get model ID from selection
     model_id = MODEL_OPTIONS[model_choice]
     # Enable CPU offload for low VRAM
     pipe.enable_model_cpu_offload()
+    # Generate video
+    output = pipe(
         prompt=prompt,
         negative_prompt=negative_prompt,
         height=height,
         width=width,
         num_frames=num_frames,
         guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps
+    ).frames[0]
+    # Export to video
+    temp_file = "output.mp4"
+    export_to_video(output, temp_file, fps=output_fps)
+    return temp_file
+# Create the Gradio interface
 with gr.Blocks() as demo:
     gr.HTML("""
     <p align="center">
     <svg version="1.1" viewBox="0 0 1200 295" xmlns="http://www.w3.org/2000/svg" xmlns:v="https://vecta.io/nano" width="400">
     💻 <a href="https://www.markury.dev/"><b>Website</b></a> &nbsp&nbsp | &nbsp&nbsp 🤗 <a href="https://huggingface.co/markury"><b>Hugging Face</b></a> &nbsp&nbsp | &nbsp&nbsp 💿 <a href="https://thebulge.xyz"><b>Discord</b></a>
     </p>
     """)
+    gr.Markdown("# Wan 2.1 T2V 1.3B with LoRA")
     with gr.Row():
         with gr.Column(scale=1):
                     step=1
                 )
             generate_btn = gr.Button("Generate Video")
         with gr.Column(scale=1):
+            output_video = gr.Video(label="Generated Video")
     generate_btn.click(
         fn=generate_video,
         inputs=[
             num_frames,
             guidance_scale,
             num_inference_steps,
+            output_fps
         ],
+        outputs=output_video
     )
     gr.Markdown("""
     - Number of frames should be of the form 4k+1 (e.g., 33, 81)
     - Stick to lower frame counts. Even at 480p, an 81 frame sequence at 30 steps will nearly time out the request in this space.
     ## Using LoRAs with multiple safetensors files:
     If you encounter an error stating "more than one weights file", you need to specify the exact weight file name in the "LoRA Weight Name" field.
     You can find this by browsing the repository on Hugging Face and looking for the safetensors files (common names include: adapter_model.safetensors, pytorch_lora_weights.safetensors).