Video-to-SoundFX

Running

App Files Files Community

fffiloni commited on Jun 14

Commit

5d9f0c4

•

1 Parent(s): fd67dba

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -97

app.py CHANGED Viewed

@@ -94,95 +94,106 @@ def get_caption(image_in):
 def get_magnet(prompt):
     amended_prompt = f"{prompt}"
     print(amended_prompt)
-    client = Client("https://fffiloni-magnet.hf.space/")
-    result = client.predict(
-        "facebook/audio-magnet-medium",	# Literal['facebook/magnet-small-10secs', 'facebook/magnet-medium-10secs', 'facebook/magnet-small-30secs', 'facebook/magnet-medium-30secs', 'facebook/audio-magnet-small', 'facebook/audio-magnet-medium']  in 'Model' Radio component
-        "",	# str  in 'Model Path (custom models)' Textbox component
-        amended_prompt,	# str  in 'Input Text' Textbox component
-        3,	# float  in 'Temperature' Number component
-        0.9,	# float  in 'Top-p' Number component
-        10,	# float  in 'Max CFG coefficient' Number component
-        1,	# float  in 'Min CFG coefficient' Number component
-        20,	# float  in 'Decoding Steps (stage 1)' Number component
-        10,	# float  in 'Decoding Steps (stage 2)' Number component
-        10,	# float  in 'Decoding Steps (stage 3)' Number component
-        10,	# float  in 'Decoding Steps (stage 4)' Number component
-        "prod-stride1 (new!)",	# Literal['max-nonoverlap', 'prod-stride1 (new!)']  in 'Span Scoring' Radio component
-        api_name="/predict_full"
-    )
-    print(result)
-    return result[1]
 def get_audioldm(prompt):
-    client = Client("https://haoheliu-audioldm2-text2audio-text2music.hf.space/")
-    result = client.predict(
-        prompt,	# str in 'Input text' Textbox component
-        "Low quality. Music.",	# str in 'Negative prompt' Textbox component
-        10,	# int | float (numeric value between 5 and 15) in 'Duration (seconds)' Slider component
-        3.5,	# int | float (numeric value between 0 and 7) in 'Guidance scale' Slider component
-        45,	# int | float in 'Seed' Number component
-        3,	# int | float (numeric value between 1 and 5) in 'Number waveforms to generate' Slider component
-        fn_index=1
-    )
-    print(result)
-    audio_result = extract_audio(result)
-    return audio_result
 def get_audiogen(prompt):
-    client = Client("https://fffiloni-audiogen.hf.space/")
-    result = client.predict(
-        prompt,
-        10,
-        api_name="/infer"
-    )
-    return result
 def get_tango(prompt):
     try:
-        #client = Client("https://declare-lab-tango.hf.space/")
-        client = Client("https://fffiloni-tango.hf.space/", hf_token=hf_token)
-    except:
-        raise gr.Error("Tango space API is not ready, please try again in few minutes ")
-    result = client.predict(
 				prompt,	# str representing string value in 'Prompt' Textbox component
 				100,	# int | float representing numeric value between 100 and 200 in 'Steps' Slider component
 				4,	# int | float representing numeric value between 1 and 10 in 'Guidance Scale' Slider component
 				api_name="/predict"
-    )
-    print(result)
-    return result
 def get_tango2(prompt):
     try:
         client = Client("declare-lab/tango2")
-    except:
-        raise gr.Error("Tango2 space API is not ready, please try again in few minutes ")
-    result = client.predict(
     		prompt,
     		100,
     		4,
     		api_name="/predict"
-    )
-    print(result)
-    return result
 def get_stable_audio_open(prompt):
     try:
         client = Client("fffiloni/Stable-Audio-Open-A10", hf_token=hf_token)
     except:
         raise gr.Error("Stable Audio Open space API is not ready, please try again in few minutes ")
-    result = client.predict(
-		prompt=prompt,
-		seconds_total=30,
-		steps=100,
-		cfg_scale=7,
-		api_name="/predict"
-    )
-    print(result)
-    return result
 def blend_vsfx(video_in, audio_result):
     audioClip = AudioFileClip(audio_result)
@@ -203,46 +214,44 @@ def blend_vsfx(video_in, audio_result):
 def infer(video_in, chosen_model):
     image_in = extract_firstframe(video_in)
     caption = get_caption(image_in)
-    try:
-        if chosen_model == "MAGNet" :
-            audio_result = get_magnet(caption)
-        elif chosen_model == "AudioLDM-2" :
-            audio_result = get_audioldm(caption)
-        elif chosen_model == "AudioGen" :
-            audio_result = get_audiogen(caption)
-        elif chosen_model == "Tango" :
-            audio_result = get_tango(caption)
-        elif chosen_model == "Tango 2" :
-            audio_result = get_tango2(caption)
-        elif chosen_model == "Stable Audio Open" :
-            audio_result = get_stable_audio_open(caption)
-        final_res = blend_vsfx(video_in, audio_result)
-        return gr.update(value=caption, interactive=True), gr.update(interactive=True), audio_result, final_res
-    except:
-        raise gr.Error(f"an error occured with {chosen_model}")
 def retry(edited_prompt, video_in, chosen_model):
     image_in = extract_firstframe(video_in)
     caption = edited_prompt
-    try:
-        if chosen_model == "MAGNet" :
-            audio_result = get_magnet(caption)
-        elif chosen_model == "AudioLDM-2" :
-            audio_result = get_audioldm(caption)
-        elif chosen_model == "AudioGen" :
-            audio_result = get_audiogen(caption)
-        elif chosen_model == "Tango" :
-            audio_result = get_tango(caption)
-        elif chosen_model == "Tango 2" :
-            audio_result = get_tango2(caption)
-        elif chosen_model == "Stable Audio Open" :
-            audio_result = get_stable_audio_open(caption)
-        final_res = blend_vsfx(video_in, audio_result)
-        return audio_result, final_res
-    except:
-        raise gr.Error(f"an error occured with {chosen_model}")
 def refresh():

 def get_magnet(prompt):
     amended_prompt = f"{prompt}"
     print(amended_prompt)
+    try:
+        client = Client("https://fffiloni-magnet.hf.space/")
+        result = client.predict(
+            "facebook/audio-magnet-medium",	# Literal['facebook/magnet-small-10secs', 'facebook/magnet-medium-10secs', 'facebook/magnet-small-30secs', 'facebook/magnet-medium-30secs', 'facebook/audio-magnet-small', 'facebook/audio-magnet-medium']  in 'Model' Radio component
+            "",	# str  in 'Model Path (custom models)' Textbox component
+            amended_prompt,	# str  in 'Input Text' Textbox component
+            3,	# float  in 'Temperature' Number component
+            0.9,	# float  in 'Top-p' Number component
+            10,	# float  in 'Max CFG coefficient' Number component
+            1,	# float  in 'Min CFG coefficient' Number component
+            20,	# float  in 'Decoding Steps (stage 1)' Number component
+            10,	# float  in 'Decoding Steps (stage 2)' Number component
+            10,	# float  in 'Decoding Steps (stage 3)' Number component
+            10,	# float  in 'Decoding Steps (stage 4)' Number component
+            "prod-stride1 (new!)",	# Literal['max-nonoverlap', 'prod-stride1 (new!)']  in 'Span Scoring' Radio component
+            api_name="/predict_full"
+        )
+        print(result)
+        return result[1]
+    except:
+        raise gr.Error("MAGNet space API is not ready, please try again in few minutes ")
 def get_audioldm(prompt):
+    try:
+        client = Client("https://haoheliu-audioldm2-text2audio-text2music.hf.space/")
+        result = client.predict(
+            prompt,	# str in 'Input text' Textbox component
+            "Low quality. Music.",	# str in 'Negative prompt' Textbox component
+            10,	# int | float (numeric value between 5 and 15) in 'Duration (seconds)' Slider component
+            3.5,	# int | float (numeric value between 0 and 7) in 'Guidance scale' Slider component
+            45,	# int | float in 'Seed' Number component
+            3,	# int | float (numeric value between 1 and 5) in 'Number waveforms to generate' Slider component
+            fn_index=1
+        )
+        print(result)
+        audio_result = extract_audio(result)
+        return audio_result
+    except:
+        raise gr.Error("AudioLDM space API is not ready, please try again in few minutes ")
 def get_audiogen(prompt):
+    try:
+        client = Client("https://fffiloni-audiogen.hf.space/")
+        result = client.predict(
+            prompt,
+            10,
+            api_name="/infer"
+        )
+        return result
+    except:
+        raise gr.Error("AudioGen space API is not ready, please try again in few minutes ")
 def get_tango(prompt):
     try:
+        client = Client("fffiloni/tango", hf_token=hf_token)
+        result = client.predict(
 				prompt,	# str representing string value in 'Prompt' Textbox component
 				100,	# int | float representing numeric value between 100 and 200 in 'Steps' Slider component
 				4,	# int | float representing numeric value between 1 and 10 in 'Guidance Scale' Slider component
 				api_name="/predict"
+        )
+        print(result)
+        return result
+    except:
+        raise gr.Error("Tango space API is not ready, please try again in few minutes ")
 def get_tango2(prompt):
     try:
         client = Client("declare-lab/tango2")
+        result = client.predict(
     		prompt,
     		100,
     		4,
     		api_name="/predict"
+        )
+        print(result)
+        return result
+    except:
+        raise gr.Error("Tango2 space API is not ready, please try again in few minutes ")
 def get_stable_audio_open(prompt):
     try:
         client = Client("fffiloni/Stable-Audio-Open-A10", hf_token=hf_token)
+        result = client.predict(
+    		prompt=prompt,
+    		seconds_total=30,
+    		steps=100,
+    		cfg_scale=7,
+    		api_name="/predict"
+        )
+        print(result)
+        return result
     except:
         raise gr.Error("Stable Audio Open space API is not ready, please try again in few minutes ")
 def blend_vsfx(video_in, audio_result):
     audioClip = AudioFileClip(audio_result)
 def infer(video_in, chosen_model):
     image_in = extract_firstframe(video_in)
     caption = get_caption(image_in)
+    if chosen_model == "MAGNet" :
+        audio_result = get_magnet(caption)
+    elif chosen_model == "AudioLDM-2" :
+        audio_result = get_audioldm(caption)
+    elif chosen_model == "AudioGen" :
+        audio_result = get_audiogen(caption)
+    elif chosen_model == "Tango" :
+        audio_result = get_tango(caption)
+    elif chosen_model == "Tango 2" :
+        audio_result = get_tango2(caption)
+    elif chosen_model == "Stable Audio Open" :
+        audio_result = get_stable_audio_open(caption)
+    final_res = blend_vsfx(video_in, audio_result)
+    return gr.update(value=caption, interactive=True), gr.update(interactive=True), audio_result, final_res
 def retry(edited_prompt, video_in, chosen_model):
     image_in = extract_firstframe(video_in)
     caption = edited_prompt
+    if chosen_model == "MAGNet" :
+        audio_result = get_magnet(caption)
+    elif chosen_model == "AudioLDM-2" :
+        audio_result = get_audioldm(caption)
+    elif chosen_model == "AudioGen" :
+        audio_result = get_audiogen(caption)
+    elif chosen_model == "Tango" :
+        audio_result = get_tango(caption)
+    elif chosen_model == "Tango 2" :
+        audio_result = get_tango2(caption)
+    elif chosen_model == "Stable Audio Open" :
+        audio_result = get_stable_audio_open(caption)
+    final_res = blend_vsfx(video_in, audio_result)
+    return audio_result, final_res
 def refresh():