Spaces:
Running
Running
Create app_2.py
Browse files
app_2.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from diffusers import (
|
3 |
+
AnimateDiffControlNetPipeline, AutoencoderKL,
|
4 |
+
ControlNetModel, MotionAdapter, LCMScheduler
|
5 |
+
)
|
6 |
+
from diffusers.utils import export_to_gif, load_video
|
7 |
+
from controlnet_aux import MidasDetector # Faster than ZoeDetector
|
8 |
+
|
9 |
+
# Load depth-based ControlNet (in diffusers format)
|
10 |
+
controlnet = ControlNetModel.from_pretrained(
|
11 |
+
"lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16
|
12 |
+
)
|
13 |
+
|
14 |
+
# Load AnimateDiff Motion Adapter (AnimateLCM)
|
15 |
+
motion_adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
|
16 |
+
|
17 |
+
# Load VAE for SD 1.5
|
18 |
+
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
|
19 |
+
|
20 |
+
# Load AnimateDiff pipeline with ControlNet
|
21 |
+
pipe = AnimateDiffControlNetPipeline.from_pretrained(
|
22 |
+
"SG161222/Realistic_Vision_V5.1_noVAE",
|
23 |
+
motion_adapter=motion_adapter,
|
24 |
+
controlnet=controlnet,
|
25 |
+
vae=vae,
|
26 |
+
).to(device="cuda", dtype=torch.float16)
|
27 |
+
|
28 |
+
# Use LCM Scheduler (optimized for AnimateLCM)
|
29 |
+
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
30 |
+
|
31 |
+
# Load AnimateLCM LoRA
|
32 |
+
pipe.load_lora_weights(
|
33 |
+
"wangfuyun/AnimateLCM",
|
34 |
+
weight_name="AnimateLCM_sd15_t2v_lora.safetensors",
|
35 |
+
adapter_name="lcm-lora"
|
36 |
+
)
|
37 |
+
pipe.set_adapters(["lcm-lora"], adapter_scales=[0.8])
|
38 |
+
|
39 |
+
# Use MiDaS for depth extraction (faster)
|
40 |
+
depth_detector = MidasDetector.from_pretrained("lllyasviel/Annotators").to("cuda")
|
41 |
+
|
42 |
+
# Load input video for depth-based conditioning
|
43 |
+
video = load_video("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-vid2vid-input-1.gif")
|
44 |
+
conditioning_frames = []
|
45 |
+
|
46 |
+
# Process video frames into depth maps
|
47 |
+
for frame in video:
|
48 |
+
conditioning_frames.append(depth_detector(frame))
|
49 |
+
|
50 |
+
# Define prompts
|
51 |
+
prompt = "a panda, playing a guitar, sitting in a pink boat, in the ocean, mountains in background, realistic, high quality"
|
52 |
+
negative_prompt = "blurry, deformed, distorted, bad quality"
|
53 |
+
|
54 |
+
# Generate animated output
|
55 |
+
output = pipe(
|
56 |
+
prompt=prompt,
|
57 |
+
negative_prompt=negative_prompt,
|
58 |
+
num_frames=len(video),
|
59 |
+
num_inference_steps=10,
|
60 |
+
guidance_scale=2.0,
|
61 |
+
conditioning_frames=conditioning_frames,
|
62 |
+
generator=torch.manual_seed(42),
|
63 |
+
).frames[0]
|
64 |
+
|
65 |
+
# Save animation as GIF
|
66 |
+
export_to_gif(output, "animatediff_controlnet.gif", fps=8)
|