Spaces:

sonisphere
/

demo

Running on T4

App Files Files Community

Phil Sobrepena commited on 17 days ago

Commit

65f1027

1 Parent(s): eb4ead1

fix

Browse files

Files changed (1) hide show

app.py +3 -35

app.py CHANGED Viewed

@@ -64,7 +64,7 @@ def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: 0,
                    cfg_strength: 4.5, duration: 8.0):
     rng = torch.Generator(device=device)
-    if seed >= 1:
         rng.manual_seed(seed)
     else:
         rng.seed()
@@ -98,42 +98,10 @@ def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: 0,
     return video_save_path
-@spaces.GPU(duration=120)
-@torch.inference_mode()
-def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int, cfg_strength: float,
-                  duration: float):
-    rng = torch.Generator(device=device)
-    if seed >= 0:
-        rng.manual_seed(seed)
-    else:
-        rng.seed()
-    fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
-    clip_frames = sync_frames = None
-    seq_cfg.duration = duration
-    net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
-    audios = generate(clip_frames,
-                      sync_frames, [prompt],
-                      negative_text=[negative_prompt],
-                      feature_utils=feature_utils,
-                      net=net,
-                      fm=fm,
-                      rng=rng,
-                      cfg_strength=cfg_strength)
-    audio = audios.float().cpu()[0]
-    audio_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.flac').name
-    torchaudio.save(audio_save_path, audio, seq_cfg.sampling_rate)
-    log.info(f'Saved audio to {audio_save_path}')
-    return audio_save_path
 video_to_audio_tab = gr.Interface(
     fn=video_to_audio,
     description="""
-    Sonisphere | Video-to-Audio
     NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
     Doing so does not improve results.
@@ -150,7 +118,7 @@ video_to_audio_tab = gr.Interface(
     ],
     outputs='playable_video',
     cache_examples=False,
-    title='Sonisphere — Video-to-Audio Synthesis',
     examples=[
     ])

                    cfg_strength: 4.5, duration: 8.0):
     rng = torch.Generator(device=device)
+    if seed >= 0:
         rng.manual_seed(seed)
     else:
         rng.seed()
     return video_save_path
 video_to_audio_tab = gr.Interface(
     fn=video_to_audio,
     description="""
+    Video-to-Audio
     NOTE: It takes longer to process high-resolution videos (>384 px on the shorter side).
     Doing so does not improve results.
     ],
     outputs='playable_video',
     cache_examples=False,
+    title='Sonisphere — Sonic Branding Synthesis',
     examples=[
     ])