Spaces:

Nymbo
/

Serverless-VideoGen-Hub

Running

App Files Files Community

Nymbo commited on 4 days ago

Commit

26ed6a6

verified ·

1 Parent(s): c085c80

Create app.py

Browse files

Files changed (1) hide show

app.py +466 -0

app.py ADDED Viewed

	@@ -0,0 +1,466 @@

+import gradio as gr
+from huggingface_hub import InferenceClient
+import os
+import json
+import base64
+from PIL import Image
+import io
+import time
+import tempfile
+import uuid
+# Access token from environment variable
+ACCESS_TOKEN = os.getenv("HF_TOKEN")
+print("Access token loaded.")
+def generate_video(
+    prompt,
+    negative_prompt,
+    num_frames,
+    fps,
+    width,
+    height,
+    num_inference_steps,
+    guidance_scale,
+    motion_bucket_id,
+    seed,
+    provider,
+    custom_api_key,
+    custom_model,
+    model_search_term,
+    selected_model
+):
+    """Generate a video based on the provided parameters"""
+    print(f"Received prompt: {prompt}")
+    print(f"Negative prompt: {negative_prompt}")
+    print(f"Num frames: {num_frames}, FPS: {fps}")
+    print(f"Width: {width}, Height: {height}")
+    print(f"Steps: {num_inference_steps}, Guidance Scale: {guidance_scale}")
+    print(f"Motion Bucket ID: {motion_bucket_id}, Seed: {seed}")
+    print(f"Selected provider: {provider}")
+    print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
+    print(f"Selected model (custom_model): {custom_model}")
+    print(f"Model search term: {model_search_term}")
+    print(f"Selected model from radio: {selected_model}")
+    # Determine which token to use - custom API key if provided, otherwise the ACCESS_TOKEN
+    token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
+    # Log which token source we're using (without printing the actual token)
+    if custom_api_key.strip() != "":
+        print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
+    else:
+        print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
+    # Initialize the Inference Client with the provider and appropriate token
+    client = InferenceClient(token=token_to_use, provider=provider)
+    print(f"Hugging Face Inference Client initialized with {provider} provider.")
+    # Convert seed to None if -1 (meaning random)
+    if seed == -1:
+        seed = None
+    else:
+        # Ensure seed is an integer
+        seed = int(seed)
+    # Determine which model to use, prioritizing custom_model if provided
+    model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
+    print(f"Model selected for inference: {model_to_use}")
+    # Create a unique ID for this generation
+    generation_id = uuid.uuid4().hex[:8]
+    print(f"Generation ID: {generation_id}")
+    # Prepare parameters for the video generation request
+    # Note: Different providers may have different parameter requirements
+    parameters = {
+        "prompt": prompt,
+        "negative_prompt": negative_prompt,
+        "num_frames": num_frames,
+        "fps": fps,
+        "width": width,
+        "height": height,
+        "num_inference_steps": num_inference_steps,
+        "guidance_scale": guidance_scale,
+    }
+    # Add motion_bucket_id if applicable (depends on the model)
+    if motion_bucket_id is not None:
+        parameters["motion_bucket_id"] = motion_bucket_id
+    # Add seed if specified
+    if seed is not None:
+        parameters["seed"] = seed
+    # For FalAI provider - may need specific formatting
+    if provider == "fal-ai":
+        print("Using FalAI provider, adapting parameters...")
+        # FalAI might use different parameter formats or additional settings
+        parameters = {
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "num_frames": num_frames,
+            "seed": seed if seed is not None else -1,
+            "width": width,
+            "height": height,
+            "num_inference_steps": num_inference_steps,
+            "guidance_scale": guidance_scale,
+        }
+    # For Novita provider - may need specific formatting
+    if provider == "novita":
+        print("Using Novita provider, adapting parameters...")
+        # Based on documentation, Novita uses text_to_video method
+        try:
+            # For Novita, we use a different method from the InferenceClient
+            video_data = client.text_to_video(
+                prompt=prompt,
+                model=model_to_use,
+                negative_prompt=negative_prompt,
+                num_frames=num_frames,
+                fps=fps,
+                width=width,
+                height=height,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                seed=seed
+            )
+            # Save the video to a temporary file
+            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+            temp_file.write(video_data)
+            video_path = temp_file.name
+            temp_file.close()
+            print(f"Video saved to temporary file: {video_path}")
+            return video_path
+        except Exception as e:
+            print(f"Error during Novita video generation: {e}")
+            return f"Error: {str(e)}"
+    # For Replicate provider - may need specific formatting
+    if provider == "replicate":
+        print("Using Replicate provider, adapting parameters...")
+        # Replicate might use different parameter formats
+        try:
+            # For Replicate, we use their specific method structure
+            response = client.post(
+                model=model_to_use,
+                input={
+                    "prompt": prompt,
+                    "negative_prompt": negative_prompt,
+                    "num_frames": num_frames,
+                    "fps": fps,
+                    "width": width,
+                    "height": height,
+                    "num_inference_steps": num_inference_steps,
+                    "guidance_scale": guidance_scale,
+                    "seed": seed if seed is not None else 0,
+                },
+            )
+            # Replicate typically returns a URL to the generated video
+            if isinstance(response, dict) and "output" in response:
+                video_url = response["output"]
+                print(f"Video generated, URL: {video_url}")
+                return video_url
+            else:
+                return str(response)
+        except Exception as e:
+            print(f"Error during Replicate video generation: {e}")
+            return f"Error: {str(e)}"
+    # General approach for other providers
+    try:
+        print(f"Sending request to {provider} provider with model {model_to_use}.")
+        print(f"Parameters: {parameters}")
+        # Use the text_to_video method of the InferenceClient
+        video_data = client.text_to_video(
+            prompt=prompt,
+            model=model_to_use,
+            **parameters
+        )
+        # Save the video to a temporary file
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+        temp_file.write(video_data)
+        video_path = temp_file.name
+        temp_file.close()
+        print(f"Video saved to temporary file: {video_path}")
+        return video_path
+    except Exception as e:
+        print(f"Error during video generation: {e}")
+        return f"Error: {str(e)}"
+# Function to validate provider selection based on BYOK
+def validate_provider(api_key, provider):
+    # If no custom API key is provided, only "hf-inference" can be used
+    if not api_key.strip() and provider != "hf-inference":
+        return gr.update(value="hf-inference")
+    return gr.update(value=provider)
+# Define the GRADIO UI
+with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
+    # Set a title for the application
+    gr.Markdown("# 🎬 Serverless-VideoGen-Hub")
+    gr.Markdown("Generate videos using Hugging Face Serverless Inference")
+    with gr.Row():
+        with gr.Column(scale=2):
+            # Main video output area
+            video_output = gr.Video(label="Generated Video", height=400)
+            # Basic input components
+            prompt_box = gr.Textbox(
+                value="A beautiful sunset over a calm ocean",
+                placeholder="Enter a prompt for your video",
+                label="Prompt",
+                lines=3
+            )
+            # Generate button
+            generate_button = gr.Button("🎬 Generate Video", variant="primary")
+        with gr.Column(scale=1):
+            # Model selection components
+            model_search_box = gr.Textbox(
+                label="Filter Models",
+                placeholder="Search for a model...",
+                lines=1
+            )
+            models_list = [
+                "stabilityai/stable-video-diffusion-img2vid-xt",
+                "stabilityai/stable-video-diffusion-img2vid",
+                "damo-vilab/text-to-video-ms-1.7b",
+                "tencent/HunyuanVideo",
+                "Wan-AI/Wan2.1-T2V-14B",
+                "PixArt-alpha/PixArt-sigma-vid",
+                "strangerbytesxyz/motion-animator-diffusion-video"
+            ]
+            featured_model_radio = gr.Radio(
+                label="Select a model below",
+                choices=models_list,
+                value="stabilityai/stable-video-diffusion-img2vid",
+                interactive=True
+            )
+            custom_model_box = gr.Textbox(
+                value="",
+                label="Custom Model",
+                info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
+                placeholder="damo-vilab/text-to-video-ms-1.7b"
+            )
+    # Advanced settings in an accordion
+    with gr.Accordion("Advanced Settings", open=False):
+        with gr.Row():
+            with gr.Column():
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    placeholder="What should NOT be in the video",
+                    value="poor quality, distortion, blurry, low resolution, grainy",
+                    lines=2
+                )
+                with gr.Row():
+                    width = gr.Slider(
+                        minimum=256,
+                        maximum=1024,
+                        value=512,
+                        step=64,
+                        label="Width"
+                    )
+                    height = gr.Slider(
+                        minimum=256,
+                        maximum=1024,
+                        value=512,
+                        step=64,
+                        label="Height"
+                    )
+                with gr.Row():
+                    num_frames = gr.Slider(
+                        minimum=8,
+                        maximum=64,
+                        value=16,
+                        step=1,
+                        label="Number of Frames"
+                    )
+                    fps = gr.Slider(
+                        minimum=1,
+                        maximum=30,
+                        value=8,
+                        step=1,
+                        label="Frames Per Second"
+                    )
+            with gr.Column():
+                with gr.Row():
+                    num_inference_steps = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        value=25,
+                        step=1,
+                        label="Inference Steps"
+                    )
+                    guidance_scale = gr.Slider(
+                        minimum=1.0,
+                        maximum=20.0,
+                        value=7.5,
+                        step=0.5,
+                        label="Guidance Scale"
+                    )
+                with gr.Row():
+                    motion_bucket_id = gr.Slider(
+                        minimum=1,
+                        maximum=255,
+                        value=127,
+                        step=1,
+                        label="Motion Bucket ID (for SVD models)"
+                    )
+                    seed = gr.Slider(
+                        minimum=-1,
+                        maximum=2147483647,
+                        value=-1,
+                        step=1,
+                        label="Seed (-1 for random)"
+                    )
+                # Provider selection
+                providers_list = [
+                    "hf-inference",  # Default Hugging Face Inference
+                    "fal-ai",        # Fal AI provider
+                    "novita",        # Novita provider
+                    "replicate",     # Replicate provider
+                ]
+                provider_radio = gr.Radio(
+                    choices=providers_list,
+                    value="hf-inference",
+                    label="Inference Provider",
+                    info="Select an inference provider. Note: Requires provider-specific API key except for hf-inference"
+                )
+                # BYOK textbox
+                byok_textbox = gr.Textbox(
+                    value="",
+                    label="BYOK (Bring Your Own Key)",
+                    info="Enter a provider API key here. When empty, only 'hf-inference' provider can be used.",
+                    placeholder="Enter your provider API token",
+                    type="password"  # Hide the API key for security
+                )
+    # Set up the generation click event
+    generate_button.click(
+        fn=generate_video,
+        inputs=[
+            prompt_box,
+            negative_prompt,
+            num_frames,
+            fps,
+            width,
+            height,
+            num_inference_steps,
+            guidance_scale,
+            motion_bucket_id,
+            seed,
+            provider_radio,
+            byok_textbox,
+            custom_model_box,
+            model_search_box,
+            featured_model_radio
+        ],
+        outputs=video_output
+    )
+    # Connect the model filter to update the radio choices
+    def filter_models(search_term):
+        print(f"Filtering models with search term: {search_term}")
+        filtered = [m for m in models_list if search_term.lower() in m.lower()]
+        print(f"Filtered models: {filtered}")
+        return gr.update(choices=filtered)
+    model_search_box.change(
+        fn=filter_models,
+        inputs=model_search_box,
+        outputs=featured_model_radio
+    )
+    # Connect the featured model radio to update the custom model box
+    def set_custom_model_from_radio(selected):
+        """
+        This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
+        We will update the Custom Model text box with that selection automatically.
+        """
+        print(f"Featured model selected: {selected}")
+        return selected
+    featured_model_radio.change(
+        fn=set_custom_model_from_radio,
+        inputs=featured_model_radio,
+        outputs=custom_model_box
+    )
+    # Connect the BYOK textbox to validate provider selection
+    byok_textbox.change(
+        fn=validate_provider,
+        inputs=[byok_textbox, provider_radio],
+        outputs=provider_radio
+    )
+    # Also validate provider when the radio changes to ensure consistency
+    provider_radio.change(
+        fn=validate_provider,
+        inputs=[byok_textbox, provider_radio],
+        outputs=provider_radio
+    )
+    # Information tab
+    with gr.Accordion("Information & Help", open=False):
+        gr.Markdown("""
+        # 🎬 Serverless-VideoGen-Hub
+        This application uses Hugging Face's Serverless Inference API to generate videos from text prompts.
+        ## Supported Providers
+        - **hf-inference**: Hugging Face's default inference API (free)
+        - **fal-ai**: Fal AI provider (requires API key)
+        - **novita**: Novita AI provider (requires API key)
+        - **replicate**: Replicate provider (requires API key)
+        ## Parameters Explained
+        - **Prompt**: The text description of your desired video
+        - **Negative Prompt**: What you DON'T want to see in the video
+        - **Width/Height**: Dimensions of the generated video
+        - **Number of Frames**: Total frames to generate
+        - **FPS**: Frames per second for playback
+        - **Inference Steps**: More steps = higher quality but slower generation
+        - **Guidance Scale**: How closely to follow the prompt (higher values = more faithful)
+        - **Motion Bucket ID**: Controls motion intensity (for Stable Video Diffusion models)
+        - **Seed**: For reproducible results, -1 means random
+        ## Models
+        You can either select from the featured models or enter a custom model path.
+        Check out [Hugging Face's models page](https://huggingface.co/models?pipeline_tag=text-to-video) for more video generation models.
+        """)
+# Launch the app
+if __name__ == "__main__":
+    print("Launching the demo application.")
+    demo.launch(show_api=True)