Spaces:

espnet
/

SingingSDS

Running

App Files Files Community

jhansss commited on 6 days ago

Commit

b03522b

1 Parent(s): 1df5822

Refactor run_pipeline and update_metrics methods to use a global pipeline instance and improve parameter handling

Browse files

Files changed (1) hide show

interface.py +89 -29

interface.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import time
 import uuid
-from functools import partial
 import gradio as gr
 import spaces
@@ -9,35 +8,85 @@ import yaml
 from characters import CHARACTERS
 from pipeline import SingingDialoguePipeline
 @spaces.GPU(duration=120)
-def run_pipeline(audio_path, interface):
     if not audio_path:
-        return gr.update(value=None), gr.update(value=None)
     tmp_file = f"audio_{int(time.time())}_{uuid.uuid4().hex[:8]}.wav"
-    results = interface.pipeline.run(
         audio_path,
-        interface.svs_model_map[interface.current_svs_model]["lang"],
-        interface.character_info[interface.current_character].prompt,
-        interface.current_voice,
         output_audio_path=tmp_file,
     )
     formatted_logs = f"ASR: {results['asr_text']}\nLLM: {results['llm_text']}"
-    return gr.update(value=formatted_logs), gr.update(
-        value=results["output_audio_path"]
     )
 @spaces.GPU(duration=120)
-def update_metrics(audio_path, interface):
-    if not audio_path or not interface.results:
         return gr.update(value="")
-    results = interface.pipeline.evaluate(audio_path, **interface.results)
-    results.update(interface.results.get("metrics", {}))
     formatted_metrics = "\n".join([f"{k}: {v}" for k, v in results.items()])
     return gr.update(value=formatted_metrics)
 class GradioInterface:
     def __init__(self, options_config: str, default_config: str):
         self.options = self.load_config(options_config)
@@ -53,7 +102,6 @@ class GradioInterface:
         self.current_voice = self.svs_model_map[self.current_svs_model]["voices"][
             self.character_info[self.current_character].default_voice
         ]
-        self.pipeline = SingingDialoguePipeline(self.default_config)
         self.results = None
     def load_config(self, path: str):
@@ -177,18 +225,18 @@ class GradioInterface:
                     fn=self.update_voice, inputs=voice_radio, outputs=voice_radio
                 )
                 mic_input.change(
-                    fn=partial(run_pipeline, interface=self),
                     inputs=mic_input,
                     outputs=[interaction_log, audio_output],
                 )
                 metrics_button.click(
-                    fn=partial(update_metrics, interface=self),
                     inputs=audio_output,
                     outputs=[metrics_output],
                 )
             return demo
-        except Exception as e:
             import traceback
             print(traceback.format_exc())
@@ -205,12 +253,12 @@ class GradioInterface:
         )
     def update_asr_model(self, asr_model):
-        self.pipeline.set_asr_model(asr_model)
-        return gr.update(value=asr_model)
     def update_llm_model(self, llm_model):
-        self.pipeline.set_llm_model(llm_model)
-        return gr.update(value=llm_model)
     def update_svs_model(self, svs_model):
         self.current_svs_model = svs_model
@@ -218,12 +266,9 @@ class GradioInterface:
         self.current_voice = self.svs_model_map[self.current_svs_model]["voices"][
             character_voice
         ]
-        self.pipeline.set_svs_model(
-            self.svs_model_map[self.current_svs_model]["model_path"]
-        )
-        print(
-            f"SVS model updated to {self.current_svs_model}. Will set gradio svs_radio to {svs_model} and voice_radio to {character_voice}"
-        )
         return (
             gr.update(value=svs_model),
             gr.update(
@@ -236,9 +281,24 @@ class GradioInterface:
     def update_melody_source(self, melody_source):
         self.current_melody_source = melody_source
-        self.pipeline.set_melody_controller(melody_source)
-        return gr.update(value=self.current_melody_source)
     def update_voice(self, voice):
         self.current_voice = self.svs_model_map[self.current_svs_model]["voices"][voice]
         return gr.update(value=voice)

 import time
 import uuid
 import gradio as gr
 import spaces
 from characters import CHARACTERS
 from pipeline import SingingDialoguePipeline
+pipe = None
+def _ensure_pipeline(config):
+    """Ensure pipeline is initialized in GPU worker context."""
+    global pipe
+    if pipe is None:
+        pipe = SingingDialoguePipeline(config)
 @spaces.GPU(duration=120)
+def run_pipeline(audio_path, config, svs_model_info, character_prompt, current_voice):
+    global pipe
     if not audio_path:
+        return gr.update(value=None), gr.update(value=None), None
+    _ensure_pipeline(config)
     tmp_file = f"audio_{int(time.time())}_{uuid.uuid4().hex[:8]}.wav"
+    results = pipe.run(
         audio_path,
+        svs_model_info["lang"],
+        character_prompt,
+        current_voice,
         output_audio_path=tmp_file,
     )
     formatted_logs = f"ASR: {results['asr_text']}\nLLM: {results['llm_text']}"
+    return (
+        gr.update(value=formatted_logs),
+        gr.update(value=results["output_audio_path"]),
+        results,
     )
 @spaces.GPU(duration=120)
+def update_metrics(audio_path, config, results_data):
+    global pipe
+    if not audio_path or not results_data:
         return gr.update(value="")
+    _ensure_pipeline(config)
+    results = pipe.evaluate(audio_path, **results_data)
+    results.update(results_data.get("metrics", {}))
     formatted_metrics = "\n".join([f"{k}: {v}" for k, v in results.items()])
     return gr.update(value=formatted_metrics)
+@spaces.GPU(duration=120)
+def update_asr_model_in_pipeline(config, asr_model):
+    _ensure_pipeline(config)
+    pipe.set_asr_model(asr_model)
+    return gr.update(value=asr_model)
+@spaces.GPU(duration=120)
+def update_llm_model_in_pipeline(config, llm_model):
+    _ensure_pipeline(config)
+    pipe.set_llm_model(llm_model)
+    return gr.update(value=llm_model)
+@spaces.GPU(duration=120)
+def update_svs_model_in_pipeline(config, svs_model_path):
+    _ensure_pipeline(config)
+    pipe.set_svs_model(svs_model_path)
+    return gr.update()
+@spaces.GPU(duration=120)
+def update_melody_source_in_pipeline(config, melody_source):
+    _ensure_pipeline(config)
+    pipe.set_melody_controller(melody_source)
+    return gr.update(value=melody_source)
 class GradioInterface:
     def __init__(self, options_config: str, default_config: str):
         self.options = self.load_config(options_config)
         self.current_voice = self.svs_model_map[self.current_svs_model]["voices"][
             self.character_info[self.current_character].default_voice
         ]
         self.results = None
     def load_config(self, path: str):
                     fn=self.update_voice, inputs=voice_radio, outputs=voice_radio
                 )
                 mic_input.change(
+                    fn=self._run_pipeline_wrapper,
                     inputs=mic_input,
                     outputs=[interaction_log, audio_output],
                 )
                 metrics_button.click(
+                    fn=self._update_metrics_wrapper,
                     inputs=audio_output,
                     outputs=[metrics_output],
                 )
             return demo
+        except Exception:
             import traceback
             print(traceback.format_exc())
         )
     def update_asr_model(self, asr_model):
+        self.default_config["asr_model"] = asr_model
+        return update_asr_model_in_pipeline(self.default_config, asr_model)
     def update_llm_model(self, llm_model):
+        self.default_config["llm_model"] = llm_model
+        return update_llm_model_in_pipeline(self.default_config, llm_model)
     def update_svs_model(self, svs_model):
         self.current_svs_model = svs_model
         self.current_voice = self.svs_model_map[self.current_svs_model]["voices"][
             character_voice
         ]
+        svs_model_path = self.svs_model_map[self.current_svs_model]["model_path"]
+        self.default_config["svs_model"] = svs_model_path
+        update_svs_model_in_pipeline(self.default_config, svs_model_path)
         return (
             gr.update(value=svs_model),
             gr.update(
     def update_melody_source(self, melody_source):
         self.current_melody_source = melody_source
+        self.default_config["melody_source"] = melody_source
+        return update_melody_source_in_pipeline(self.default_config, melody_source)
     def update_voice(self, voice):
         self.current_voice = self.svs_model_map[self.current_svs_model]["voices"][voice]
         return gr.update(value=voice)
+    def _run_pipeline_wrapper(self, audio_path):
+        log_update, audio_update, pipeline_results = run_pipeline(
+            audio_path,
+            self.default_config,
+            self.svs_model_map[self.current_svs_model],
+            self.character_info[self.current_character].prompt,
+            self.current_voice,
+        )
+        if pipeline_results:
+            self.results = pipeline_results
+        return log_update, audio_update
+    def _update_metrics_wrapper(self, audio_path):
+        return update_metrics(audio_path, self.default_config, self.results or {})