Wan2.1-T2V-1.3B-Local

Paused

SeedOfEvil commited on Mar 12

Commit

57ac0c8

verified ·

1 Parent(s): 8ab574b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,30 +4,36 @@ from diffusers import AutoencoderKLWan, WanPipeline
 from diffusers.utils import export_to_video
 import spaces  # ZeroGPU integration
-# Global pipeline variable (initially None)
-PIPELINE = None
-@spaces.GPU
-def get_pipeline():
-    global PIPELINE
-    if PIPELINE is None:
-        model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
-        print("Loading model in worker process (moving to GPU)...")
-        vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
-        PIPELINE = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
-        PIPELINE.to("cuda")
-        print("Model loaded on GPU.")
-    return PIPELINE
-@spaces.GPU
 def generate_video(prompt, negative_prompt=""):
-    pipeline = get_pipeline()
-    output = pipeline(
         prompt=prompt,
         negative_prompt=negative_prompt,
         height=480,        # 480p height
         width=832,         # Suitable width for 480p videos
-        num_frames=81,     # Adjust as needed for desired video length
         guidance_scale=5.0 # Recommended guidance scale for the 1.3B model
     ).frames[0]

 from diffusers.utils import export_to_video
 import spaces  # ZeroGPU integration
+def load_pipeline_on_cpu():
+    model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
+    print("Preloading model on CPU...")
+    vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
+    pipeline_cpu = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
+    # Remain on CPU for now
+    print("Model preloaded on CPU.")
+    return pipeline_cpu
+# Load the model on CPU during container initialization.
+PIPELINE_CPU = load_pipeline_on_cpu()
+PIPELINE_GPU = None  # Will hold the GPU-loaded pipeline after the first request
+@spaces.GPU  # This ensures GPU is only initialized in the request worker process
 def generate_video(prompt, negative_prompt=""):
+    global PIPELINE_GPU
+    # Move to GPU on first request if not already done.
+    if PIPELINE_GPU is None:
+        print("Moving model to GPU...")
+        PIPELINE_GPU = PIPELINE_CPU.to("cuda")
+        print("Model moved to GPU.")
+    pipeline_gpu = PIPELINE_GPU
+    # Generate video frames at 480p resolution
+    output = pipeline_gpu(
         prompt=prompt,
         negative_prompt=negative_prompt,
         height=480,        # 480p height
         width=832,         # Suitable width for 480p videos
+        num_frames=81,     # Adjust for desired video length
         guidance_scale=5.0 # Recommended guidance scale for the 1.3B model
     ).frames[0]