Nymbo's picture
Create app.py
26ed6a6 verified
raw
history blame
17.2 kB
import gradio as gr
from huggingface_hub import InferenceClient
import os
import json
import base64
from PIL import Image
import io
import time
import tempfile
import uuid
# Access token from environment variable
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")
def generate_video(
prompt,
negative_prompt,
num_frames,
fps,
width,
height,
num_inference_steps,
guidance_scale,
motion_bucket_id,
seed,
provider,
custom_api_key,
custom_model,
model_search_term,
selected_model
):
"""Generate a video based on the provided parameters"""
print(f"Received prompt: {prompt}")
print(f"Negative prompt: {negative_prompt}")
print(f"Num frames: {num_frames}, FPS: {fps}")
print(f"Width: {width}, Height: {height}")
print(f"Steps: {num_inference_steps}, Guidance Scale: {guidance_scale}")
print(f"Motion Bucket ID: {motion_bucket_id}, Seed: {seed}")
print(f"Selected provider: {provider}")
print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
print(f"Selected model (custom_model): {custom_model}")
print(f"Model search term: {model_search_term}")
print(f"Selected model from radio: {selected_model}")
# Determine which token to use - custom API key if provided, otherwise the ACCESS_TOKEN
token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
# Log which token source we're using (without printing the actual token)
if custom_api_key.strip() != "":
print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
else:
print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
# Initialize the Inference Client with the provider and appropriate token
client = InferenceClient(token=token_to_use, provider=provider)
print(f"Hugging Face Inference Client initialized with {provider} provider.")
# Convert seed to None if -1 (meaning random)
if seed == -1:
seed = None
else:
# Ensure seed is an integer
seed = int(seed)
# Determine which model to use, prioritizing custom_model if provided
model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
print(f"Model selected for inference: {model_to_use}")
# Create a unique ID for this generation
generation_id = uuid.uuid4().hex[:8]
print(f"Generation ID: {generation_id}")
# Prepare parameters for the video generation request
# Note: Different providers may have different parameter requirements
parameters = {
"prompt": prompt,
"negative_prompt": negative_prompt,
"num_frames": num_frames,
"fps": fps,
"width": width,
"height": height,
"num_inference_steps": num_inference_steps,
"guidance_scale": guidance_scale,
}
# Add motion_bucket_id if applicable (depends on the model)
if motion_bucket_id is not None:
parameters["motion_bucket_id"] = motion_bucket_id
# Add seed if specified
if seed is not None:
parameters["seed"] = seed
# For FalAI provider - may need specific formatting
if provider == "fal-ai":
print("Using FalAI provider, adapting parameters...")
# FalAI might use different parameter formats or additional settings
parameters = {
"prompt": prompt,
"negative_prompt": negative_prompt,
"num_frames": num_frames,
"seed": seed if seed is not None else -1,
"width": width,
"height": height,
"num_inference_steps": num_inference_steps,
"guidance_scale": guidance_scale,
}
# For Novita provider - may need specific formatting
if provider == "novita":
print("Using Novita provider, adapting parameters...")
# Based on documentation, Novita uses text_to_video method
try:
# For Novita, we use a different method from the InferenceClient
video_data = client.text_to_video(
prompt=prompt,
model=model_to_use,
negative_prompt=negative_prompt,
num_frames=num_frames,
fps=fps,
width=width,
height=height,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
seed=seed
)
# Save the video to a temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
temp_file.write(video_data)
video_path = temp_file.name
temp_file.close()
print(f"Video saved to temporary file: {video_path}")
return video_path
except Exception as e:
print(f"Error during Novita video generation: {e}")
return f"Error: {str(e)}"
# For Replicate provider - may need specific formatting
if provider == "replicate":
print("Using Replicate provider, adapting parameters...")
# Replicate might use different parameter formats
try:
# For Replicate, we use their specific method structure
response = client.post(
model=model_to_use,
input={
"prompt": prompt,
"negative_prompt": negative_prompt,
"num_frames": num_frames,
"fps": fps,
"width": width,
"height": height,
"num_inference_steps": num_inference_steps,
"guidance_scale": guidance_scale,
"seed": seed if seed is not None else 0,
},
)
# Replicate typically returns a URL to the generated video
if isinstance(response, dict) and "output" in response:
video_url = response["output"]
print(f"Video generated, URL: {video_url}")
return video_url
else:
return str(response)
except Exception as e:
print(f"Error during Replicate video generation: {e}")
return f"Error: {str(e)}"
# General approach for other providers
try:
print(f"Sending request to {provider} provider with model {model_to_use}.")
print(f"Parameters: {parameters}")
# Use the text_to_video method of the InferenceClient
video_data = client.text_to_video(
prompt=prompt,
model=model_to_use,
**parameters
)
# Save the video to a temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
temp_file.write(video_data)
video_path = temp_file.name
temp_file.close()
print(f"Video saved to temporary file: {video_path}")
return video_path
except Exception as e:
print(f"Error during video generation: {e}")
return f"Error: {str(e)}"
# Function to validate provider selection based on BYOK
def validate_provider(api_key, provider):
# If no custom API key is provided, only "hf-inference" can be used
if not api_key.strip() and provider != "hf-inference":
return gr.update(value="hf-inference")
return gr.update(value=provider)
# Define the GRADIO UI
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
# Set a title for the application
gr.Markdown("# 🎬 Serverless-VideoGen-Hub")
gr.Markdown("Generate videos using Hugging Face Serverless Inference")
with gr.Row():
with gr.Column(scale=2):
# Main video output area
video_output = gr.Video(label="Generated Video", height=400)
# Basic input components
prompt_box = gr.Textbox(
value="A beautiful sunset over a calm ocean",
placeholder="Enter a prompt for your video",
label="Prompt",
lines=3
)
# Generate button
generate_button = gr.Button("🎬 Generate Video", variant="primary")
with gr.Column(scale=1):
# Model selection components
model_search_box = gr.Textbox(
label="Filter Models",
placeholder="Search for a model...",
lines=1
)
models_list = [
"stabilityai/stable-video-diffusion-img2vid-xt",
"stabilityai/stable-video-diffusion-img2vid",
"damo-vilab/text-to-video-ms-1.7b",
"tencent/HunyuanVideo",
"Wan-AI/Wan2.1-T2V-14B",
"PixArt-alpha/PixArt-sigma-vid",
"strangerbytesxyz/motion-animator-diffusion-video"
]
featured_model_radio = gr.Radio(
label="Select a model below",
choices=models_list,
value="stabilityai/stable-video-diffusion-img2vid",
interactive=True
)
custom_model_box = gr.Textbox(
value="",
label="Custom Model",
info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
placeholder="damo-vilab/text-to-video-ms-1.7b"
)
# Advanced settings in an accordion
with gr.Accordion("Advanced Settings", open=False):
with gr.Row():
with gr.Column():
negative_prompt = gr.Textbox(
label="Negative Prompt",
placeholder="What should NOT be in the video",
value="poor quality, distortion, blurry, low resolution, grainy",
lines=2
)
with gr.Row():
width = gr.Slider(
minimum=256,
maximum=1024,
value=512,
step=64,
label="Width"
)
height = gr.Slider(
minimum=256,
maximum=1024,
value=512,
step=64,
label="Height"
)
with gr.Row():
num_frames = gr.Slider(
minimum=8,
maximum=64,
value=16,
step=1,
label="Number of Frames"
)
fps = gr.Slider(
minimum=1,
maximum=30,
value=8,
step=1,
label="Frames Per Second"
)
with gr.Column():
with gr.Row():
num_inference_steps = gr.Slider(
minimum=1,
maximum=100,
value=25,
step=1,
label="Inference Steps"
)
guidance_scale = gr.Slider(
minimum=1.0,
maximum=20.0,
value=7.5,
step=0.5,
label="Guidance Scale"
)
with gr.Row():
motion_bucket_id = gr.Slider(
minimum=1,
maximum=255,
value=127,
step=1,
label="Motion Bucket ID (for SVD models)"
)
seed = gr.Slider(
minimum=-1,
maximum=2147483647,
value=-1,
step=1,
label="Seed (-1 for random)"
)
# Provider selection
providers_list = [
"hf-inference", # Default Hugging Face Inference
"fal-ai", # Fal AI provider
"novita", # Novita provider
"replicate", # Replicate provider
]
provider_radio = gr.Radio(
choices=providers_list,
value="hf-inference",
label="Inference Provider",
info="Select an inference provider. Note: Requires provider-specific API key except for hf-inference"
)
# BYOK textbox
byok_textbox = gr.Textbox(
value="",
label="BYOK (Bring Your Own Key)",
info="Enter a provider API key here. When empty, only 'hf-inference' provider can be used.",
placeholder="Enter your provider API token",
type="password" # Hide the API key for security
)
# Set up the generation click event
generate_button.click(
fn=generate_video,
inputs=[
prompt_box,
negative_prompt,
num_frames,
fps,
width,
height,
num_inference_steps,
guidance_scale,
motion_bucket_id,
seed,
provider_radio,
byok_textbox,
custom_model_box,
model_search_box,
featured_model_radio
],
outputs=video_output
)
# Connect the model filter to update the radio choices
def filter_models(search_term):
print(f"Filtering models with search term: {search_term}")
filtered = [m for m in models_list if search_term.lower() in m.lower()]
print(f"Filtered models: {filtered}")
return gr.update(choices=filtered)
model_search_box.change(
fn=filter_models,
inputs=model_search_box,
outputs=featured_model_radio
)
# Connect the featured model radio to update the custom model box
def set_custom_model_from_radio(selected):
"""
This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
We will update the Custom Model text box with that selection automatically.
"""
print(f"Featured model selected: {selected}")
return selected
featured_model_radio.change(
fn=set_custom_model_from_radio,
inputs=featured_model_radio,
outputs=custom_model_box
)
# Connect the BYOK textbox to validate provider selection
byok_textbox.change(
fn=validate_provider,
inputs=[byok_textbox, provider_radio],
outputs=provider_radio
)
# Also validate provider when the radio changes to ensure consistency
provider_radio.change(
fn=validate_provider,
inputs=[byok_textbox, provider_radio],
outputs=provider_radio
)
# Information tab
with gr.Accordion("Information & Help", open=False):
gr.Markdown("""
# 🎬 Serverless-VideoGen-Hub
This application uses Hugging Face's Serverless Inference API to generate videos from text prompts.
## Supported Providers
- **hf-inference**: Hugging Face's default inference API (free)
- **fal-ai**: Fal AI provider (requires API key)
- **novita**: Novita AI provider (requires API key)
- **replicate**: Replicate provider (requires API key)
## Parameters Explained
- **Prompt**: The text description of your desired video
- **Negative Prompt**: What you DON'T want to see in the video
- **Width/Height**: Dimensions of the generated video
- **Number of Frames**: Total frames to generate
- **FPS**: Frames per second for playback
- **Inference Steps**: More steps = higher quality but slower generation
- **Guidance Scale**: How closely to follow the prompt (higher values = more faithful)
- **Motion Bucket ID**: Controls motion intensity (for Stable Video Diffusion models)
- **Seed**: For reproducible results, -1 means random
## Models
You can either select from the featured models or enter a custom model path.
Check out [Hugging Face's models page](https://huggingface.co/models?pipeline_tag=text-to-video) for more video generation models.
""")
# Launch the app
if __name__ == "__main__":
print("Launching the demo application.")
demo.launch(show_api=True)