Spaces:
Running
Running
import gradio as gr | |
from huggingface_hub import InferenceClient | |
import os | |
import json | |
import base64 | |
from PIL import Image | |
import io | |
import time | |
import tempfile | |
import uuid | |
# Access token from environment variable | |
ACCESS_TOKEN = os.getenv("HF_TOKEN") | |
print("Access token loaded.") | |
def generate_video( | |
prompt, | |
negative_prompt, | |
num_frames, | |
fps, | |
width, | |
height, | |
num_inference_steps, | |
guidance_scale, | |
motion_bucket_id, | |
seed, | |
provider, | |
custom_api_key, | |
custom_model, | |
model_search_term, | |
selected_model | |
): | |
"""Generate a video based on the provided parameters""" | |
print(f"Received prompt: {prompt}") | |
print(f"Negative prompt: {negative_prompt}") | |
print(f"Num frames: {num_frames}, FPS: {fps}") | |
print(f"Width: {width}, Height: {height}") | |
print(f"Steps: {num_inference_steps}, Guidance Scale: {guidance_scale}") | |
print(f"Motion Bucket ID: {motion_bucket_id}, Seed: {seed}") | |
print(f"Selected provider: {provider}") | |
print(f"Custom API Key provided: {bool(custom_api_key.strip())}") | |
print(f"Selected model (custom_model): {custom_model}") | |
print(f"Model search term: {model_search_term}") | |
print(f"Selected model from radio: {selected_model}") | |
# Determine which token to use - custom API key if provided, otherwise the ACCESS_TOKEN | |
token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN | |
# Log which token source we're using (without printing the actual token) | |
if custom_api_key.strip() != "": | |
print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication") | |
else: | |
print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication") | |
# Initialize the Inference Client with the provider and appropriate token | |
client = InferenceClient(token=token_to_use, provider=provider) | |
print(f"Hugging Face Inference Client initialized with {provider} provider.") | |
# Convert seed to None if -1 (meaning random) | |
if seed == -1: | |
seed = None | |
else: | |
# Ensure seed is an integer | |
seed = int(seed) | |
# Determine which model to use, prioritizing custom_model if provided | |
model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model | |
print(f"Model selected for inference: {model_to_use}") | |
# Create a unique ID for this generation | |
generation_id = uuid.uuid4().hex[:8] | |
print(f"Generation ID: {generation_id}") | |
# Prepare parameters for the video generation request | |
# Note: Different providers may have different parameter requirements | |
parameters = { | |
"prompt": prompt, | |
"negative_prompt": negative_prompt, | |
"num_frames": num_frames, | |
"fps": fps, | |
"width": width, | |
"height": height, | |
"num_inference_steps": num_inference_steps, | |
"guidance_scale": guidance_scale, | |
} | |
# Add motion_bucket_id if applicable (depends on the model) | |
if motion_bucket_id is not None: | |
parameters["motion_bucket_id"] = motion_bucket_id | |
# Add seed if specified | |
if seed is not None: | |
parameters["seed"] = seed | |
# For FalAI provider - may need specific formatting | |
if provider == "fal-ai": | |
print("Using FalAI provider, adapting parameters...") | |
# FalAI might use different parameter formats or additional settings | |
parameters = { | |
"prompt": prompt, | |
"negative_prompt": negative_prompt, | |
"num_frames": num_frames, | |
"seed": seed if seed is not None else -1, | |
"width": width, | |
"height": height, | |
"num_inference_steps": num_inference_steps, | |
"guidance_scale": guidance_scale, | |
} | |
# For Novita provider - may need specific formatting | |
if provider == "novita": | |
print("Using Novita provider, adapting parameters...") | |
# Based on documentation, Novita uses text_to_video method | |
try: | |
# For Novita, we use a different method from the InferenceClient | |
video_data = client.text_to_video( | |
prompt=prompt, | |
model=model_to_use, | |
negative_prompt=negative_prompt, | |
num_frames=num_frames, | |
fps=fps, | |
width=width, | |
height=height, | |
num_inference_steps=num_inference_steps, | |
guidance_scale=guidance_scale, | |
seed=seed | |
) | |
# Save the video to a temporary file | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") | |
temp_file.write(video_data) | |
video_path = temp_file.name | |
temp_file.close() | |
print(f"Video saved to temporary file: {video_path}") | |
return video_path | |
except Exception as e: | |
print(f"Error during Novita video generation: {e}") | |
return f"Error: {str(e)}" | |
# For Replicate provider - may need specific formatting | |
if provider == "replicate": | |
print("Using Replicate provider, adapting parameters...") | |
# Replicate might use different parameter formats | |
try: | |
# For Replicate, we use their specific method structure | |
response = client.post( | |
model=model_to_use, | |
input={ | |
"prompt": prompt, | |
"negative_prompt": negative_prompt, | |
"num_frames": num_frames, | |
"fps": fps, | |
"width": width, | |
"height": height, | |
"num_inference_steps": num_inference_steps, | |
"guidance_scale": guidance_scale, | |
"seed": seed if seed is not None else 0, | |
}, | |
) | |
# Replicate typically returns a URL to the generated video | |
if isinstance(response, dict) and "output" in response: | |
video_url = response["output"] | |
print(f"Video generated, URL: {video_url}") | |
return video_url | |
else: | |
return str(response) | |
except Exception as e: | |
print(f"Error during Replicate video generation: {e}") | |
return f"Error: {str(e)}" | |
# General approach for other providers | |
try: | |
print(f"Sending request to {provider} provider with model {model_to_use}.") | |
print(f"Parameters: {parameters}") | |
# Use the text_to_video method of the InferenceClient | |
video_data = client.text_to_video( | |
prompt=prompt, | |
model=model_to_use, | |
**parameters | |
) | |
# Save the video to a temporary file | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") | |
temp_file.write(video_data) | |
video_path = temp_file.name | |
temp_file.close() | |
print(f"Video saved to temporary file: {video_path}") | |
return video_path | |
except Exception as e: | |
print(f"Error during video generation: {e}") | |
return f"Error: {str(e)}" | |
# Function to validate provider selection based on BYOK | |
def validate_provider(api_key, provider): | |
# If no custom API key is provided, only "hf-inference" can be used | |
if not api_key.strip() and provider != "hf-inference": | |
return gr.update(value="hf-inference") | |
return gr.update(value=provider) | |
# Define the GRADIO UI | |
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo: | |
# Set a title for the application | |
gr.Markdown("# 🎬 Serverless-VideoGen-Hub") | |
gr.Markdown("Generate videos using Hugging Face Serverless Inference") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# Main video output area | |
video_output = gr.Video(label="Generated Video", height=400) | |
# Basic input components | |
prompt_box = gr.Textbox( | |
value="A beautiful sunset over a calm ocean", | |
placeholder="Enter a prompt for your video", | |
label="Prompt", | |
lines=3 | |
) | |
# Generate button | |
generate_button = gr.Button("🎬 Generate Video", variant="primary") | |
with gr.Column(scale=1): | |
# Model selection components | |
model_search_box = gr.Textbox( | |
label="Filter Models", | |
placeholder="Search for a model...", | |
lines=1 | |
) | |
models_list = [ | |
"stabilityai/stable-video-diffusion-img2vid-xt", | |
"stabilityai/stable-video-diffusion-img2vid", | |
"damo-vilab/text-to-video-ms-1.7b", | |
"tencent/HunyuanVideo", | |
"Wan-AI/Wan2.1-T2V-14B", | |
"PixArt-alpha/PixArt-sigma-vid", | |
"strangerbytesxyz/motion-animator-diffusion-video" | |
] | |
featured_model_radio = gr.Radio( | |
label="Select a model below", | |
choices=models_list, | |
value="stabilityai/stable-video-diffusion-img2vid", | |
interactive=True | |
) | |
custom_model_box = gr.Textbox( | |
value="", | |
label="Custom Model", | |
info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.", | |
placeholder="damo-vilab/text-to-video-ms-1.7b" | |
) | |
# Advanced settings in an accordion | |
with gr.Accordion("Advanced Settings", open=False): | |
with gr.Row(): | |
with gr.Column(): | |
negative_prompt = gr.Textbox( | |
label="Negative Prompt", | |
placeholder="What should NOT be in the video", | |
value="poor quality, distortion, blurry, low resolution, grainy", | |
lines=2 | |
) | |
with gr.Row(): | |
width = gr.Slider( | |
minimum=256, | |
maximum=1024, | |
value=512, | |
step=64, | |
label="Width" | |
) | |
height = gr.Slider( | |
minimum=256, | |
maximum=1024, | |
value=512, | |
step=64, | |
label="Height" | |
) | |
with gr.Row(): | |
num_frames = gr.Slider( | |
minimum=8, | |
maximum=64, | |
value=16, | |
step=1, | |
label="Number of Frames" | |
) | |
fps = gr.Slider( | |
minimum=1, | |
maximum=30, | |
value=8, | |
step=1, | |
label="Frames Per Second" | |
) | |
with gr.Column(): | |
with gr.Row(): | |
num_inference_steps = gr.Slider( | |
minimum=1, | |
maximum=100, | |
value=25, | |
step=1, | |
label="Inference Steps" | |
) | |
guidance_scale = gr.Slider( | |
minimum=1.0, | |
maximum=20.0, | |
value=7.5, | |
step=0.5, | |
label="Guidance Scale" | |
) | |
with gr.Row(): | |
motion_bucket_id = gr.Slider( | |
minimum=1, | |
maximum=255, | |
value=127, | |
step=1, | |
label="Motion Bucket ID (for SVD models)" | |
) | |
seed = gr.Slider( | |
minimum=-1, | |
maximum=2147483647, | |
value=-1, | |
step=1, | |
label="Seed (-1 for random)" | |
) | |
# Provider selection | |
providers_list = [ | |
"hf-inference", # Default Hugging Face Inference | |
"fal-ai", # Fal AI provider | |
"novita", # Novita provider | |
"replicate", # Replicate provider | |
] | |
provider_radio = gr.Radio( | |
choices=providers_list, | |
value="hf-inference", | |
label="Inference Provider", | |
info="Select an inference provider. Note: Requires provider-specific API key except for hf-inference" | |
) | |
# BYOK textbox | |
byok_textbox = gr.Textbox( | |
value="", | |
label="BYOK (Bring Your Own Key)", | |
info="Enter a provider API key here. When empty, only 'hf-inference' provider can be used.", | |
placeholder="Enter your provider API token", | |
type="password" # Hide the API key for security | |
) | |
# Set up the generation click event | |
generate_button.click( | |
fn=generate_video, | |
inputs=[ | |
prompt_box, | |
negative_prompt, | |
num_frames, | |
fps, | |
width, | |
height, | |
num_inference_steps, | |
guidance_scale, | |
motion_bucket_id, | |
seed, | |
provider_radio, | |
byok_textbox, | |
custom_model_box, | |
model_search_box, | |
featured_model_radio | |
], | |
outputs=video_output | |
) | |
# Connect the model filter to update the radio choices | |
def filter_models(search_term): | |
print(f"Filtering models with search term: {search_term}") | |
filtered = [m for m in models_list if search_term.lower() in m.lower()] | |
print(f"Filtered models: {filtered}") | |
return gr.update(choices=filtered) | |
model_search_box.change( | |
fn=filter_models, | |
inputs=model_search_box, | |
outputs=featured_model_radio | |
) | |
# Connect the featured model radio to update the custom model box | |
def set_custom_model_from_radio(selected): | |
""" | |
This function will get triggered whenever someone picks a model from the 'Featured Models' radio. | |
We will update the Custom Model text box with that selection automatically. | |
""" | |
print(f"Featured model selected: {selected}") | |
return selected | |
featured_model_radio.change( | |
fn=set_custom_model_from_radio, | |
inputs=featured_model_radio, | |
outputs=custom_model_box | |
) | |
# Connect the BYOK textbox to validate provider selection | |
byok_textbox.change( | |
fn=validate_provider, | |
inputs=[byok_textbox, provider_radio], | |
outputs=provider_radio | |
) | |
# Also validate provider when the radio changes to ensure consistency | |
provider_radio.change( | |
fn=validate_provider, | |
inputs=[byok_textbox, provider_radio], | |
outputs=provider_radio | |
) | |
# Information tab | |
with gr.Accordion("Information & Help", open=False): | |
gr.Markdown(""" | |
# 🎬 Serverless-VideoGen-Hub | |
This application uses Hugging Face's Serverless Inference API to generate videos from text prompts. | |
## Supported Providers | |
- **hf-inference**: Hugging Face's default inference API (free) | |
- **fal-ai**: Fal AI provider (requires API key) | |
- **novita**: Novita AI provider (requires API key) | |
- **replicate**: Replicate provider (requires API key) | |
## Parameters Explained | |
- **Prompt**: The text description of your desired video | |
- **Negative Prompt**: What you DON'T want to see in the video | |
- **Width/Height**: Dimensions of the generated video | |
- **Number of Frames**: Total frames to generate | |
- **FPS**: Frames per second for playback | |
- **Inference Steps**: More steps = higher quality but slower generation | |
- **Guidance Scale**: How closely to follow the prompt (higher values = more faithful) | |
- **Motion Bucket ID**: Controls motion intensity (for Stable Video Diffusion models) | |
- **Seed**: For reproducible results, -1 means random | |
## Models | |
You can either select from the featured models or enter a custom model path. | |
Check out [Hugging Face's models page](https://huggingface.co/models?pipeline_tag=text-to-video) for more video generation models. | |
""") | |
# Launch the app | |
if __name__ == "__main__": | |
print("Launching the demo application.") | |
demo.launch(show_api=True) |