Nymbo commited on
Commit
26ed6a6
·
verified ·
1 Parent(s): c085c80

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +466 -0
app.py ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ import os
4
+ import json
5
+ import base64
6
+ from PIL import Image
7
+ import io
8
+ import time
9
+ import tempfile
10
+ import uuid
11
+
12
+ # Access token from environment variable
13
+ ACCESS_TOKEN = os.getenv("HF_TOKEN")
14
+ print("Access token loaded.")
15
+
16
+ def generate_video(
17
+ prompt,
18
+ negative_prompt,
19
+ num_frames,
20
+ fps,
21
+ width,
22
+ height,
23
+ num_inference_steps,
24
+ guidance_scale,
25
+ motion_bucket_id,
26
+ seed,
27
+ provider,
28
+ custom_api_key,
29
+ custom_model,
30
+ model_search_term,
31
+ selected_model
32
+ ):
33
+ """Generate a video based on the provided parameters"""
34
+ print(f"Received prompt: {prompt}")
35
+ print(f"Negative prompt: {negative_prompt}")
36
+ print(f"Num frames: {num_frames}, FPS: {fps}")
37
+ print(f"Width: {width}, Height: {height}")
38
+ print(f"Steps: {num_inference_steps}, Guidance Scale: {guidance_scale}")
39
+ print(f"Motion Bucket ID: {motion_bucket_id}, Seed: {seed}")
40
+ print(f"Selected provider: {provider}")
41
+ print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
42
+ print(f"Selected model (custom_model): {custom_model}")
43
+ print(f"Model search term: {model_search_term}")
44
+ print(f"Selected model from radio: {selected_model}")
45
+
46
+ # Determine which token to use - custom API key if provided, otherwise the ACCESS_TOKEN
47
+ token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
48
+
49
+ # Log which token source we're using (without printing the actual token)
50
+ if custom_api_key.strip() != "":
51
+ print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
52
+ else:
53
+ print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
54
+
55
+ # Initialize the Inference Client with the provider and appropriate token
56
+ client = InferenceClient(token=token_to_use, provider=provider)
57
+ print(f"Hugging Face Inference Client initialized with {provider} provider.")
58
+
59
+ # Convert seed to None if -1 (meaning random)
60
+ if seed == -1:
61
+ seed = None
62
+ else:
63
+ # Ensure seed is an integer
64
+ seed = int(seed)
65
+
66
+ # Determine which model to use, prioritizing custom_model if provided
67
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
68
+ print(f"Model selected for inference: {model_to_use}")
69
+
70
+ # Create a unique ID for this generation
71
+ generation_id = uuid.uuid4().hex[:8]
72
+ print(f"Generation ID: {generation_id}")
73
+
74
+ # Prepare parameters for the video generation request
75
+ # Note: Different providers may have different parameter requirements
76
+ parameters = {
77
+ "prompt": prompt,
78
+ "negative_prompt": negative_prompt,
79
+ "num_frames": num_frames,
80
+ "fps": fps,
81
+ "width": width,
82
+ "height": height,
83
+ "num_inference_steps": num_inference_steps,
84
+ "guidance_scale": guidance_scale,
85
+ }
86
+
87
+ # Add motion_bucket_id if applicable (depends on the model)
88
+ if motion_bucket_id is not None:
89
+ parameters["motion_bucket_id"] = motion_bucket_id
90
+
91
+ # Add seed if specified
92
+ if seed is not None:
93
+ parameters["seed"] = seed
94
+
95
+ # For FalAI provider - may need specific formatting
96
+ if provider == "fal-ai":
97
+ print("Using FalAI provider, adapting parameters...")
98
+ # FalAI might use different parameter formats or additional settings
99
+ parameters = {
100
+ "prompt": prompt,
101
+ "negative_prompt": negative_prompt,
102
+ "num_frames": num_frames,
103
+ "seed": seed if seed is not None else -1,
104
+ "width": width,
105
+ "height": height,
106
+ "num_inference_steps": num_inference_steps,
107
+ "guidance_scale": guidance_scale,
108
+ }
109
+
110
+ # For Novita provider - may need specific formatting
111
+ if provider == "novita":
112
+ print("Using Novita provider, adapting parameters...")
113
+ # Based on documentation, Novita uses text_to_video method
114
+ try:
115
+ # For Novita, we use a different method from the InferenceClient
116
+ video_data = client.text_to_video(
117
+ prompt=prompt,
118
+ model=model_to_use,
119
+ negative_prompt=negative_prompt,
120
+ num_frames=num_frames,
121
+ fps=fps,
122
+ width=width,
123
+ height=height,
124
+ num_inference_steps=num_inference_steps,
125
+ guidance_scale=guidance_scale,
126
+ seed=seed
127
+ )
128
+
129
+ # Save the video to a temporary file
130
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
131
+ temp_file.write(video_data)
132
+ video_path = temp_file.name
133
+ temp_file.close()
134
+
135
+ print(f"Video saved to temporary file: {video_path}")
136
+ return video_path
137
+
138
+ except Exception as e:
139
+ print(f"Error during Novita video generation: {e}")
140
+ return f"Error: {str(e)}"
141
+
142
+ # For Replicate provider - may need specific formatting
143
+ if provider == "replicate":
144
+ print("Using Replicate provider, adapting parameters...")
145
+ # Replicate might use different parameter formats
146
+ try:
147
+ # For Replicate, we use their specific method structure
148
+ response = client.post(
149
+ model=model_to_use,
150
+ input={
151
+ "prompt": prompt,
152
+ "negative_prompt": negative_prompt,
153
+ "num_frames": num_frames,
154
+ "fps": fps,
155
+ "width": width,
156
+ "height": height,
157
+ "num_inference_steps": num_inference_steps,
158
+ "guidance_scale": guidance_scale,
159
+ "seed": seed if seed is not None else 0,
160
+ },
161
+ )
162
+
163
+ # Replicate typically returns a URL to the generated video
164
+ if isinstance(response, dict) and "output" in response:
165
+ video_url = response["output"]
166
+ print(f"Video generated, URL: {video_url}")
167
+ return video_url
168
+ else:
169
+ return str(response)
170
+
171
+ except Exception as e:
172
+ print(f"Error during Replicate video generation: {e}")
173
+ return f"Error: {str(e)}"
174
+
175
+ # General approach for other providers
176
+ try:
177
+ print(f"Sending request to {provider} provider with model {model_to_use}.")
178
+ print(f"Parameters: {parameters}")
179
+
180
+ # Use the text_to_video method of the InferenceClient
181
+ video_data = client.text_to_video(
182
+ prompt=prompt,
183
+ model=model_to_use,
184
+ **parameters
185
+ )
186
+
187
+ # Save the video to a temporary file
188
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
189
+ temp_file.write(video_data)
190
+ video_path = temp_file.name
191
+ temp_file.close()
192
+
193
+ print(f"Video saved to temporary file: {video_path}")
194
+ return video_path
195
+
196
+ except Exception as e:
197
+ print(f"Error during video generation: {e}")
198
+ return f"Error: {str(e)}"
199
+
200
+ # Function to validate provider selection based on BYOK
201
+ def validate_provider(api_key, provider):
202
+ # If no custom API key is provided, only "hf-inference" can be used
203
+ if not api_key.strip() and provider != "hf-inference":
204
+ return gr.update(value="hf-inference")
205
+ return gr.update(value=provider)
206
+
207
+ # Define the GRADIO UI
208
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
209
+ # Set a title for the application
210
+ gr.Markdown("# 🎬 Serverless-VideoGen-Hub")
211
+ gr.Markdown("Generate videos using Hugging Face Serverless Inference")
212
+
213
+ with gr.Row():
214
+ with gr.Column(scale=2):
215
+ # Main video output area
216
+ video_output = gr.Video(label="Generated Video", height=400)
217
+
218
+ # Basic input components
219
+ prompt_box = gr.Textbox(
220
+ value="A beautiful sunset over a calm ocean",
221
+ placeholder="Enter a prompt for your video",
222
+ label="Prompt",
223
+ lines=3
224
+ )
225
+
226
+ # Generate button
227
+ generate_button = gr.Button("🎬 Generate Video", variant="primary")
228
+
229
+ with gr.Column(scale=1):
230
+ # Model selection components
231
+ model_search_box = gr.Textbox(
232
+ label="Filter Models",
233
+ placeholder="Search for a model...",
234
+ lines=1
235
+ )
236
+
237
+ models_list = [
238
+ "stabilityai/stable-video-diffusion-img2vid-xt",
239
+ "stabilityai/stable-video-diffusion-img2vid",
240
+ "damo-vilab/text-to-video-ms-1.7b",
241
+ "tencent/HunyuanVideo",
242
+ "Wan-AI/Wan2.1-T2V-14B",
243
+ "PixArt-alpha/PixArt-sigma-vid",
244
+ "strangerbytesxyz/motion-animator-diffusion-video"
245
+ ]
246
+
247
+ featured_model_radio = gr.Radio(
248
+ label="Select a model below",
249
+ choices=models_list,
250
+ value="stabilityai/stable-video-diffusion-img2vid",
251
+ interactive=True
252
+ )
253
+
254
+ custom_model_box = gr.Textbox(
255
+ value="",
256
+ label="Custom Model",
257
+ info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
258
+ placeholder="damo-vilab/text-to-video-ms-1.7b"
259
+ )
260
+
261
+ # Advanced settings in an accordion
262
+ with gr.Accordion("Advanced Settings", open=False):
263
+ with gr.Row():
264
+ with gr.Column():
265
+ negative_prompt = gr.Textbox(
266
+ label="Negative Prompt",
267
+ placeholder="What should NOT be in the video",
268
+ value="poor quality, distortion, blurry, low resolution, grainy",
269
+ lines=2
270
+ )
271
+
272
+ with gr.Row():
273
+ width = gr.Slider(
274
+ minimum=256,
275
+ maximum=1024,
276
+ value=512,
277
+ step=64,
278
+ label="Width"
279
+ )
280
+
281
+ height = gr.Slider(
282
+ minimum=256,
283
+ maximum=1024,
284
+ value=512,
285
+ step=64,
286
+ label="Height"
287
+ )
288
+
289
+ with gr.Row():
290
+ num_frames = gr.Slider(
291
+ minimum=8,
292
+ maximum=64,
293
+ value=16,
294
+ step=1,
295
+ label="Number of Frames"
296
+ )
297
+
298
+ fps = gr.Slider(
299
+ minimum=1,
300
+ maximum=30,
301
+ value=8,
302
+ step=1,
303
+ label="Frames Per Second"
304
+ )
305
+
306
+ with gr.Column():
307
+ with gr.Row():
308
+ num_inference_steps = gr.Slider(
309
+ minimum=1,
310
+ maximum=100,
311
+ value=25,
312
+ step=1,
313
+ label="Inference Steps"
314
+ )
315
+
316
+ guidance_scale = gr.Slider(
317
+ minimum=1.0,
318
+ maximum=20.0,
319
+ value=7.5,
320
+ step=0.5,
321
+ label="Guidance Scale"
322
+ )
323
+
324
+ with gr.Row():
325
+ motion_bucket_id = gr.Slider(
326
+ minimum=1,
327
+ maximum=255,
328
+ value=127,
329
+ step=1,
330
+ label="Motion Bucket ID (for SVD models)"
331
+ )
332
+
333
+ seed = gr.Slider(
334
+ minimum=-1,
335
+ maximum=2147483647,
336
+ value=-1,
337
+ step=1,
338
+ label="Seed (-1 for random)"
339
+ )
340
+
341
+ # Provider selection
342
+ providers_list = [
343
+ "hf-inference", # Default Hugging Face Inference
344
+ "fal-ai", # Fal AI provider
345
+ "novita", # Novita provider
346
+ "replicate", # Replicate provider
347
+ ]
348
+
349
+ provider_radio = gr.Radio(
350
+ choices=providers_list,
351
+ value="hf-inference",
352
+ label="Inference Provider",
353
+ info="Select an inference provider. Note: Requires provider-specific API key except for hf-inference"
354
+ )
355
+
356
+ # BYOK textbox
357
+ byok_textbox = gr.Textbox(
358
+ value="",
359
+ label="BYOK (Bring Your Own Key)",
360
+ info="Enter a provider API key here. When empty, only 'hf-inference' provider can be used.",
361
+ placeholder="Enter your provider API token",
362
+ type="password" # Hide the API key for security
363
+ )
364
+
365
+ # Set up the generation click event
366
+ generate_button.click(
367
+ fn=generate_video,
368
+ inputs=[
369
+ prompt_box,
370
+ negative_prompt,
371
+ num_frames,
372
+ fps,
373
+ width,
374
+ height,
375
+ num_inference_steps,
376
+ guidance_scale,
377
+ motion_bucket_id,
378
+ seed,
379
+ provider_radio,
380
+ byok_textbox,
381
+ custom_model_box,
382
+ model_search_box,
383
+ featured_model_radio
384
+ ],
385
+ outputs=video_output
386
+ )
387
+
388
+ # Connect the model filter to update the radio choices
389
+ def filter_models(search_term):
390
+ print(f"Filtering models with search term: {search_term}")
391
+ filtered = [m for m in models_list if search_term.lower() in m.lower()]
392
+ print(f"Filtered models: {filtered}")
393
+ return gr.update(choices=filtered)
394
+
395
+ model_search_box.change(
396
+ fn=filter_models,
397
+ inputs=model_search_box,
398
+ outputs=featured_model_radio
399
+ )
400
+
401
+ # Connect the featured model radio to update the custom model box
402
+ def set_custom_model_from_radio(selected):
403
+ """
404
+ This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
405
+ We will update the Custom Model text box with that selection automatically.
406
+ """
407
+ print(f"Featured model selected: {selected}")
408
+ return selected
409
+
410
+ featured_model_radio.change(
411
+ fn=set_custom_model_from_radio,
412
+ inputs=featured_model_radio,
413
+ outputs=custom_model_box
414
+ )
415
+
416
+ # Connect the BYOK textbox to validate provider selection
417
+ byok_textbox.change(
418
+ fn=validate_provider,
419
+ inputs=[byok_textbox, provider_radio],
420
+ outputs=provider_radio
421
+ )
422
+
423
+ # Also validate provider when the radio changes to ensure consistency
424
+ provider_radio.change(
425
+ fn=validate_provider,
426
+ inputs=[byok_textbox, provider_radio],
427
+ outputs=provider_radio
428
+ )
429
+
430
+ # Information tab
431
+ with gr.Accordion("Information & Help", open=False):
432
+ gr.Markdown("""
433
+ # 🎬 Serverless-VideoGen-Hub
434
+
435
+ This application uses Hugging Face's Serverless Inference API to generate videos from text prompts.
436
+
437
+ ## Supported Providers
438
+
439
+ - **hf-inference**: Hugging Face's default inference API (free)
440
+ - **fal-ai**: Fal AI provider (requires API key)
441
+ - **novita**: Novita AI provider (requires API key)
442
+ - **replicate**: Replicate provider (requires API key)
443
+
444
+ ## Parameters Explained
445
+
446
+ - **Prompt**: The text description of your desired video
447
+ - **Negative Prompt**: What you DON'T want to see in the video
448
+ - **Width/Height**: Dimensions of the generated video
449
+ - **Number of Frames**: Total frames to generate
450
+ - **FPS**: Frames per second for playback
451
+ - **Inference Steps**: More steps = higher quality but slower generation
452
+ - **Guidance Scale**: How closely to follow the prompt (higher values = more faithful)
453
+ - **Motion Bucket ID**: Controls motion intensity (for Stable Video Diffusion models)
454
+ - **Seed**: For reproducible results, -1 means random
455
+
456
+ ## Models
457
+
458
+ You can either select from the featured models or enter a custom model path.
459
+
460
+ Check out [Hugging Face's models page](https://huggingface.co/models?pipeline_tag=text-to-video) for more video generation models.
461
+ """)
462
+
463
+ # Launch the app
464
+ if __name__ == "__main__":
465
+ print("Launching the demo application.")
466
+ demo.launch(show_api=True)