stable-video-diffusion

Running

App Files Files Community

awacke1 commited on 5 days ago

Commit

90d8457

verified ·

1 Parent(s): 754171c

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -27

app.py CHANGED Viewed

@@ -11,12 +11,27 @@ from diffusers.utils import load_image, export_to_video
 from PIL import Image
 from huggingface_hub import hf_hub_download
 pipe = StableVideoDiffusionPipeline.from_pretrained(
-    "stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
 )
-pipe.to("cuda")
-pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
 max_64_bit_int = 2**63 - 1
 # Function to sample video from the input image
@@ -29,7 +44,6 @@ def sample(
     version: str = "svd_xt",
     cond_aug: float = 0.02,
     decoding_t: int = 3,  # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
-    device: str = "cuda",
     output_folder: str = "outputs",
 ):
     if image.mode == "RGBA":
@@ -42,20 +56,30 @@ def sample(
     os.makedirs(output_folder, exist_ok=True)
     base_count = len(glob(os.path.join(output_folder, "*.mp4")))
     video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
-    frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
     export_to_video(frames, video_path, fps=fps_id)
     torch.manual_seed(seed)
     return video_path, seed
-# Function to resize the uploaded image
 def resize_image(image, output_size=(1024, 576)):
     target_aspect = output_size[0] / output_size[1]
     image_aspect = image.width / image.height
     if image_aspect > target_aspect:
         new_height = output_size[1]
         new_width = int(new_height * image_aspect)
-        resized_image = image.resize((new_width, new_height), Image.LANCZOS)
         left = (new_width - output_size[0]) / 2
         top = 0
         right = (new_width + output_size[0]) / 2
@@ -63,7 +87,7 @@ def resize_image(image, output_size=(1024, 576)):
     else:
         new_width = output_size[0]
         new_height = int(new_width / image_aspect)
-        resized_image = image.resize((new_width, new_height), Image.LANCZOS)
         left = 0
         top = (new_height - output_size[1]) / 2
         right = output_size[0]
@@ -75,39 +99,50 @@ def resize_image(image, output_size=(1024, 576)):
 # Dynamically load image files from the 'images' directory
 def get_example_images():
     image_dir = "images/"
     image_files = glob(os.path.join(image_dir, "*.png")) + glob(os.path.join(image_dir, "*.jpg"))
     return image_files
 # Gradio interface setup
 with gr.Blocks() as demo:
-    gr.Markdown('''# Stable Video Diffusion using Image 2 Video XT
-        #### Research release: generate `4s` vid from a single image at (`25 frames` at `6 fps`).''')
     with gr.Row():
         with gr.Column():
-            image = gr.Image(label="Upload your image", type="pil")
-            generate_btn = gr.Button("Generate")
-        video = gr.Video()
-    with gr.Accordion("Advanced options", open=False):
-        seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
-        randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-        motion_bucket_id = gr.Slider(label="Motion bucket id", value=127, minimum=1, maximum=255)
-        fps_id = gr.Slider(label="Frames per second", value=6, minimum=5, maximum=30)
     image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
-    generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id], outputs=[video, seed], api_name="video")
     # Dynamically load examples from the filesystem
     example_images = get_example_images()
-    gr.Examples(
-        examples=example_images,
-        inputs=image,
-        outputs=[video, seed],
-        fn=sample,
-        cache_examples=True,
-    )
 if __name__ == "__main__":
     demo.queue(max_size=20)
-    demo.launch(share=True)

 from PIL import Image
 from huggingface_hub import hf_hub_download
+# ------------------------------------------------------------------------
+# FIX: Adapt to the available hardware (GPU or CPU)
+# ------------------------------------------------------------------------
+# Automatically detect the device and select the appropriate data type.
+# This makes the code runnable on machines with or without a dedicated NVIDIA GPU.
+device = "cuda" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if device == "cuda" else torch.float32
+# Load the pipeline onto the detected device.
 pipe = StableVideoDiffusionPipeline.from_pretrained(
+    "stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch_dtype, variant="fp16"
 )
+pipe.to(device)
+# Apply torch.compile for optimization only if on a GPU, as it's most effective there.
+if device == "cuda":
+    pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+# ------------------------------------------------------------------------
 max_64_bit_int = 2**63 - 1
 # Function to sample video from the input image
     version: str = "svd_xt",
     cond_aug: float = 0.02,
     decoding_t: int = 3,  # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
     output_folder: str = "outputs",
 ):
     if image.mode == "RGBA":
     os.makedirs(output_folder, exist_ok=True)
     base_count = len(glob(os.path.join(output_folder, "*.mp4")))
     video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
+    frames = pipe(
+        image,
+        decode_chunk_size=decoding_t,
+        generator=generator,
+        motion_bucket_id=motion_bucket_id,
+        noise_aug_strength=0.1,
+        num_frames=25
+    ).frames[0]
     export_to_video(frames, video_path, fps=fps_id)
     torch.manual_seed(seed)
     return video_path, seed
+# Function to resize the uploaded image to the model's optimal input size
 def resize_image(image, output_size=(1024, 576)):
+    # Resizes and crops the image to a 16:9 aspect ratio.
     target_aspect = output_size[0] / output_size[1]
     image_aspect = image.width / image.height
     if image_aspect > target_aspect:
         new_height = output_size[1]
         new_width = int(new_height * image_aspect)
+        resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
         left = (new_width - output_size[0]) / 2
         top = 0
         right = (new_width + output_size[0]) / 2
     else:
         new_width = output_size[0]
         new_height = int(new_width / image_aspect)
+        resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
         left = 0
         top = (new_height - output_size[1]) / 2
         right = output_size[0]
 # Dynamically load image files from the 'images' directory
 def get_example_images():
     image_dir = "images/"
+    if not os.path.exists(image_dir):
+        os.makedirs(image_dir)
     image_files = glob(os.path.join(image_dir, "*.png")) + glob(os.path.join(image_dir, "*.jpg"))
     return image_files
 # Gradio interface setup
 with gr.Blocks() as demo:
+    gr.Markdown('''# Stable Video Diffusion
+    #### Generate short videos from a single image.''')
     with gr.Row():
         with gr.Column():
+            image = gr.Image(label="Upload Your Image", type="pil")
+            generate_btn = gr.Button("Generate Video", variant="primary")
+        video = gr.Video(label="Generated Video")
+    with gr.Accordion("Advanced Options", open=False):
+        seed = gr.Slider(label="Seed", value=42, minimum=0, maximum=max_64_bit_int, step=1)
+        randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+        motion_bucket_id = gr.Slider(label="Motion Bucket ID", info="Controls the amount of motion in the video.", value=127, minimum=1, maximum=255)
+        fps_id = gr.Slider(label="Frames Per Second (FPS)", info="Adjusts the playback speed of the video.", value=7, minimum=5, maximum=30)
+    # When a new image is uploaded, process it immediately
     image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
+    # When the generate button is clicked, run the sampling function
+    generate_btn.click(
+        fn=sample,
+        inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id],
+        outputs=[video, seed],
+        api_name="video"
+    )
     # Dynamically load examples from the filesystem
     example_images = get_example_images()
+    if example_images:
+        gr.Examples(
+            examples=example_images,
+            inputs=image,
+            outputs=[video, seed],
+            fn=lambda img: sample(resize_image(Image.open(img))), # Resize example images before sampling
+            cache_examples=True,
+        )
 if __name__ == "__main__":
     demo.queue(max_size=20)
+    demo.launch(share=True)