Wan-2.1-T2V-1.3B-GPU

Paused

App Files Files Community

markury commited on Mar 19

Commit

3b2e319

1 Parent(s): 887231d

debug: peft

Browse files

Files changed (2) hide show

app.py +46 -148
requirements.txt +1 -4

app.py CHANGED Viewed

@@ -1,30 +1,10 @@
-import sys
-import subprocess
-import importlib.util
-# Check if required packages are installed
-required_packages = ["ftfy", "einops", "imageio", "peft", "bitsandbytes"]
-for package in required_packages:
-    if importlib.util.find_spec(package) is None:
-        print(f"Installing missing dependency: {package}")
-        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
-import os
 import torch
 import gradio as gr
 import spaces
 from diffusers.utils import export_to_video
-# Now try to import the specific components
-try:
-    from diffusers import AutoencoderKLWan, WanPipeline
-    from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
-    from diffusers.schedulers.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
-    import peft
-    print("Successfully imported all required modules")
-except ImportError as e:
-    print(f"Error importing diffusers components: {e}")
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "diffusers", "peft"])
 # Define model options
 MODEL_OPTIONS = {
@@ -38,21 +18,7 @@ SCHEDULER_OPTIONS = {
     "FlowMatchEulerDiscreteScheduler": FlowMatchEulerDiscreteScheduler
 }
-def load_model_with_direct_lora(model_id, lora_id=None, lora_scale=0.75):
-    """
-    Alternative approach to loading the model with LoRA weights
-    without using the built-in load_lora_weights method.
-    """
-    print(f"Loading model: {model_id}")
-    vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
-    pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
-    # Print PEFT version information
-    print(f"PEFT version: {peft.__version__}")
-    return pipe
-@spaces.GPU(duration=300)  # Set a 5-minute duration for the GPU access
 def generate_video(
     model_choice,
     prompt,
@@ -68,119 +34,52 @@ def generate_video(
     num_inference_steps,
     output_fps
 ):
-    """Generate a video using the Wan model and provided parameters"""
-    try:
-        # Get model ID from selection
-        model_id = MODEL_OPTIONS[model_choice]
-        # Load the model (with or without LoRA)
-        if lora_id and lora_id.strip():
-            print(f"Loading model with LoRA: {lora_id}, scale: {lora_scale}")
-            pipe = load_model_with_direct_lora(model_id, lora_id, lora_scale)
-        else:
-            print(f"Loading model without LoRA")
-            pipe = load_model_with_direct_lora(model_id)
-        # Set the scheduler
-        scheduler_class = SCHEDULER_OPTIONS[scheduler_type]
-        print(f"Using scheduler: {scheduler_type} with flow_shift: {flow_shift}")
-        if scheduler_type == "UniPCMultistepScheduler":
-            pipe.scheduler = scheduler_class.from_config(
-                pipe.scheduler.config,
-                prediction_type="flow_prediction",
-                use_flow_sigmas=True,
-                flow_shift=flow_shift
-            )
-        else:
-            pipe.scheduler = scheduler_class(shift=flow_shift)
-        # Move to GPU
-        print("Moving model to GPU")
-        pipe.to("cuda")
-        # Enable CPU offload for low VRAM
-        print("Enabling CPU offload")
-        pipe.enable_model_cpu_offload()
-        # Load LoRA weights if provided
-        if lora_id and lora_id.strip():
-            try:
-                # Try the conventional way first
-                print(f"Loading LoRA weights using conventional method: {lora_id}")
-                pipe.load_lora_weights(lora_id)
-                print("LoRA weights loaded successfully")
-            except Exception as e:
-                print(f"Error loading LoRA weights: {str(e)}")
-                # Try an alternative approach
-                try:
-                    print("Attempting alternative approach for LoRA integration...")
-                    # Let's try the direct adapter approach
-                    from peft import PeftModel
-                    from huggingface_hub import hf_hub_download
-                    # Make a temporary directory for the LoRA weights
-                    lora_dir = "lora_weights"
-                    os.makedirs(lora_dir, exist_ok=True)
-                    # Download the LoRA weights
-                    print(f"Downloading LoRA weights from {lora_id}")
-                    lora_file = hf_hub_download(lora_id, filename="pytorch_lora_weights.safetensors")
-                    print(f"LoRA file downloaded: {lora_file}")
-                    print("Applying LoRA weights manually...")
-                    # Instead of trying to directly integrate LoRA, we'll just proceed without it for now
-                    # but with a warning message
-                    print("WARNING: Could not load LoRA weights. Proceeding without LoRA adaptation.")
-                except Exception as nested_e:
-                    print(f"Alternative LoRA approach also failed: {str(nested_e)}")
-                    print("Proceeding without LoRA weights")
-        # Generate the video
-        print(f"Generating video with prompt: {prompt[:50]}...")
-        print(f"Parameters: height={height}, width={width}, num_frames={num_frames}, "
-              f"guidance_scale={guidance_scale}, steps={num_inference_steps}")
-        # Prepare generation parameters
-        generation_params = {
-            "prompt": prompt,
-            "negative_prompt": negative_prompt,
-            "height": height,
-            "width": width,
-            "num_frames": num_frames,
-            "guidance_scale": guidance_scale,
-            "num_inference_steps": num_inference_steps
-        }
-        # Add cross attention scale if LoRA was successfully loaded
-        if lora_id and lora_id.strip():
-            generation_params["cross_attention_kwargs"] = {"scale": lora_scale}
-            print(f"Using LoRA scale: {lora_scale}")
-        # Generate the video
-        print("Starting generation...")
-        output = pipe(**generation_params).frames[0]
-        print(f"Generation complete, frames shape: {output.shape if hasattr(output, 'shape') else 'unknown'}")
-        # Export to video
-        temp_file = "output.mp4"
-        print(f"Exporting video with fps={output_fps}")
-        export_to_video(output, temp_file, fps=output_fps)
-        print(f"Video exported to {temp_file}")
-        return temp_file
-    except Exception as e:
-        import traceback
-        error_trace = traceback.format_exc()
-        print(f"Error generating video: {str(e)}\n{error_trace}")
-        return f"Error generating video: {str(e)}\n{error_trace}"
 # Create the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# Wan Video Generation with ZeroGPU")
-    gr.Markdown("Generate high-quality videos using the Wan model with optional LoRA adaptations.")
     with gr.Row():
         with gr.Column(scale=1):
@@ -309,7 +208,6 @@ with gr.Blocks() as demo:
     - For larger resolution videos, try higher values of flow shift (7.0-12.0)
     - Number of frames should be of the form 4k+1 (e.g., 49, 81, 65)
     - The model is memory intensive, so adjust resolution according to available VRAM
-    - LoRA ID should be a Hugging Face repository containing safetensors files
     """)
 demo.launch()

 import torch
 import gradio as gr
 import spaces
 from diffusers.utils import export_to_video
+from diffusers import AutoencoderKLWan, WanPipeline
+from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
+from diffusers.schedulers.scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
 # Define model options
 MODEL_OPTIONS = {
     "FlowMatchEulerDiscreteScheduler": FlowMatchEulerDiscreteScheduler
 }
+@spaces.GPU(duration=300)
 def generate_video(
     model_choice,
     prompt,
     num_inference_steps,
     output_fps
 ):
+    # Get model ID from selection
+    model_id = MODEL_OPTIONS[model_choice]
+    # Load model
+    vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
+    pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
+    # Set scheduler
+    if scheduler_type == "UniPCMultistepScheduler":
+        pipe.scheduler = UniPCMultistepScheduler.from_config(
+            pipe.scheduler.config,
+            flow_shift=flow_shift
+        )
+    else:
+        pipe.scheduler = FlowMatchEulerDiscreteScheduler(shift=flow_shift)
+    # Move to GPU
+    pipe.to("cuda")
+    # Load LoRA weights if provided
+    if lora_id and lora_id.strip():
+        pipe.load_lora_weights(lora_id)
+    # Enable CPU offload for low VRAM
+    pipe.enable_model_cpu_offload()
+    # Generate video
+    output = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        height=height,
+        width=width,
+        num_frames=num_frames,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps
+    ).frames[0]
+    # Export to video
+    temp_file = "output.mp4"
+    export_to_video(output, temp_file, fps=output_fps)
+    return temp_file
 # Create the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# Wan Video Generation with ZeroGPU")
     with gr.Row():
         with gr.Column(scale=1):
     - For larger resolution videos, try higher values of flow shift (7.0-12.0)
     - Number of frames should be of the form 4k+1 (e.g., 49, 81, 65)
     - The model is memory intensive, so adjust resolution according to available VRAM
     """)
 demo.launch()

requirements.txt CHANGED Viewed

@@ -7,7 +7,4 @@ ftfy>=6.1.3
 einops>=0.7.0
 imageio>=2.31.6
 imageio-ffmpeg>=0.4.9
-opencv-python>=4.9.0.0
-omegaconf>=2.3.0
-peft==0.7.1
-bitsandbytes>=0.41.0

 einops>=0.7.0
 imageio>=2.31.6
 imageio-ffmpeg>=0.4.9
+peft==0.7.1