K00B404 commited on
Commit
a623d98
·
verified ·
1 Parent(s): 3b59f84

Create app_2.py

Browse files
Files changed (1) hide show
  1. app_2.py +66 -0
app_2.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from diffusers import (
3
+ AnimateDiffControlNetPipeline, AutoencoderKL,
4
+ ControlNetModel, MotionAdapter, LCMScheduler
5
+ )
6
+ from diffusers.utils import export_to_gif, load_video
7
+ from controlnet_aux import MidasDetector # Faster than ZoeDetector
8
+
9
+ # Load depth-based ControlNet (in diffusers format)
10
+ controlnet = ControlNetModel.from_pretrained(
11
+ "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16
12
+ )
13
+
14
+ # Load AnimateDiff Motion Adapter (AnimateLCM)
15
+ motion_adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
16
+
17
+ # Load VAE for SD 1.5
18
+ vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
19
+
20
+ # Load AnimateDiff pipeline with ControlNet
21
+ pipe = AnimateDiffControlNetPipeline.from_pretrained(
22
+ "SG161222/Realistic_Vision_V5.1_noVAE",
23
+ motion_adapter=motion_adapter,
24
+ controlnet=controlnet,
25
+ vae=vae,
26
+ ).to(device="cuda", dtype=torch.float16)
27
+
28
+ # Use LCM Scheduler (optimized for AnimateLCM)
29
+ pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
30
+
31
+ # Load AnimateLCM LoRA
32
+ pipe.load_lora_weights(
33
+ "wangfuyun/AnimateLCM",
34
+ weight_name="AnimateLCM_sd15_t2v_lora.safetensors",
35
+ adapter_name="lcm-lora"
36
+ )
37
+ pipe.set_adapters(["lcm-lora"], adapter_scales=[0.8])
38
+
39
+ # Use MiDaS for depth extraction (faster)
40
+ depth_detector = MidasDetector.from_pretrained("lllyasviel/Annotators").to("cuda")
41
+
42
+ # Load input video for depth-based conditioning
43
+ video = load_video("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-vid2vid-input-1.gif")
44
+ conditioning_frames = []
45
+
46
+ # Process video frames into depth maps
47
+ for frame in video:
48
+ conditioning_frames.append(depth_detector(frame))
49
+
50
+ # Define prompts
51
+ prompt = "a panda, playing a guitar, sitting in a pink boat, in the ocean, mountains in background, realistic, high quality"
52
+ negative_prompt = "blurry, deformed, distorted, bad quality"
53
+
54
+ # Generate animated output
55
+ output = pipe(
56
+ prompt=prompt,
57
+ negative_prompt=negative_prompt,
58
+ num_frames=len(video),
59
+ num_inference_steps=10,
60
+ guidance_scale=2.0,
61
+ conditioning_frames=conditioning_frames,
62
+ generator=torch.manual_seed(42),
63
+ ).frames[0]
64
+
65
+ # Save animation as GIF
66
+ export_to_gif(output, "animatediff_controlnet.gif", fps=8)