Phil Sobrepena commited on
Commit
39a23a5
·
1 Parent(s): 65f1027
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -60,14 +60,15 @@ net, feature_utils, seq_cfg = get_model()
60
 
61
  @spaces.GPU(duration=120)
62
  @torch.inference_mode()
63
- def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: 0, num_steps: 25,
64
  cfg_strength: 4.5, duration: 8.0):
65
 
66
  rng = torch.Generator(device=device)
67
- if seed >= 0:
68
- rng.manual_seed(seed)
69
- else:
70
- rng.seed()
 
71
  fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
72
 
73
  video_info = load_video(video, duration)
@@ -102,6 +103,7 @@ video_to_audio_tab = gr.Interface(
102
  fn=video_to_audio,
103
  description="""
104
  Video-to-Audio
 
105
  NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
106
  Doing so does not improve results.
107
 
@@ -118,7 +120,7 @@ video_to_audio_tab = gr.Interface(
118
  ],
119
  outputs='playable_video',
120
  cache_examples=False,
121
- title='Sonisphere — Sonic Branding Synthesis',
122
  examples=[
123
  ])
124
 
 
60
 
61
  @spaces.GPU(duration=120)
62
  @torch.inference_mode()
63
+ def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, num_steps: 25,
64
  cfg_strength: 4.5, duration: 8.0):
65
 
66
  rng = torch.Generator(device=device)
67
+ # if seed >= 0:
68
+ # rng.manual_seed(seed)
69
+ # else:
70
+ rng.seed()
71
+
72
  fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
73
 
74
  video_info = load_video(video, duration)
 
103
  fn=video_to_audio,
104
  description="""
105
  Video-to-Audio
106
+
107
  NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
108
  Doing so does not improve results.
109
 
 
120
  ],
121
  outputs='playable_video',
122
  cache_examples=False,
123
+ title='Sonisphere — Sonic Branding with Multi-modal Audio Synthesis',
124
  examples=[
125
  ])
126