Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Create app_2.py
Browse files
    	
        app_2.py
    ADDED
    
    | @@ -0,0 +1,66 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import torch
         | 
| 2 | 
            +
            from diffusers import (
         | 
| 3 | 
            +
                AnimateDiffControlNetPipeline, AutoencoderKL, 
         | 
| 4 | 
            +
                ControlNetModel, MotionAdapter, LCMScheduler
         | 
| 5 | 
            +
            )
         | 
| 6 | 
            +
            from diffusers.utils import export_to_gif, load_video
         | 
| 7 | 
            +
            from controlnet_aux import MidasDetector  # Faster than ZoeDetector
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # Load depth-based ControlNet (in diffusers format)
         | 
| 10 | 
            +
            controlnet = ControlNetModel.from_pretrained(
         | 
| 11 | 
            +
                "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16
         | 
| 12 | 
            +
            )
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            # Load AnimateDiff Motion Adapter (AnimateLCM)
         | 
| 15 | 
            +
            motion_adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM")
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            # Load VAE for SD 1.5
         | 
| 18 | 
            +
            vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            # Load AnimateDiff pipeline with ControlNet
         | 
| 21 | 
            +
            pipe = AnimateDiffControlNetPipeline.from_pretrained(
         | 
| 22 | 
            +
                "SG161222/Realistic_Vision_V5.1_noVAE",
         | 
| 23 | 
            +
                motion_adapter=motion_adapter,
         | 
| 24 | 
            +
                controlnet=controlnet,
         | 
| 25 | 
            +
                vae=vae,
         | 
| 26 | 
            +
            ).to(device="cuda", dtype=torch.float16)
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            # Use LCM Scheduler (optimized for AnimateLCM)
         | 
| 29 | 
            +
            pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            # Load AnimateLCM LoRA
         | 
| 32 | 
            +
            pipe.load_lora_weights(
         | 
| 33 | 
            +
                "wangfuyun/AnimateLCM", 
         | 
| 34 | 
            +
                weight_name="AnimateLCM_sd15_t2v_lora.safetensors", 
         | 
| 35 | 
            +
                adapter_name="lcm-lora"
         | 
| 36 | 
            +
            )
         | 
| 37 | 
            +
            pipe.set_adapters(["lcm-lora"], adapter_scales=[0.8])
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            # Use MiDaS for depth extraction (faster)
         | 
| 40 | 
            +
            depth_detector = MidasDetector.from_pretrained("lllyasviel/Annotators").to("cuda")
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            # Load input video for depth-based conditioning
         | 
| 43 | 
            +
            video = load_video("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-vid2vid-input-1.gif")
         | 
| 44 | 
            +
            conditioning_frames = []
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            # Process video frames into depth maps
         | 
| 47 | 
            +
            for frame in video:
         | 
| 48 | 
            +
                conditioning_frames.append(depth_detector(frame))
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            # Define prompts
         | 
| 51 | 
            +
            prompt = "a panda, playing a guitar, sitting in a pink boat, in the ocean, mountains in background, realistic, high quality"
         | 
| 52 | 
            +
            negative_prompt = "blurry, deformed, distorted, bad quality"
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            # Generate animated output
         | 
| 55 | 
            +
            output = pipe(
         | 
| 56 | 
            +
                prompt=prompt,
         | 
| 57 | 
            +
                negative_prompt=negative_prompt,
         | 
| 58 | 
            +
                num_frames=len(video),
         | 
| 59 | 
            +
                num_inference_steps=10,
         | 
| 60 | 
            +
                guidance_scale=2.0,
         | 
| 61 | 
            +
                conditioning_frames=conditioning_frames,
         | 
| 62 | 
            +
                generator=torch.manual_seed(42),
         | 
| 63 | 
            +
            ).frames[0]
         | 
| 64 | 
            +
             | 
| 65 | 
            +
            # Save animation as GIF
         | 
| 66 | 
            +
            export_to_gif(output, "animatediff_controlnet.gif", fps=8)
         | 
 
			
