|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
import subprocess |
|
import sys |
|
import os |
|
import importlib |
|
from setuptools.command import easy_install |
|
|
|
|
|
def install_local_package(package_path): |
|
absolute_path = os.path.abspath(package_path) |
|
try: |
|
result = subprocess.run([ |
|
sys.executable, "-m", "pip", "install", absolute_path], |
|
capture_output=True, |
|
text=True, |
|
check=True) |
|
print(f"install: {absolute_path}") |
|
return True |
|
except subprocess.CalledProcessError as e: |
|
print(f"Failed: {absolute_path}") |
|
print("Error message:", e.stderr) |
|
return False |
|
except Exception as e: |
|
print(f"unkown error: {e}") |
|
return False |
|
|
|
|
|
install_local_package("./wedefense-0.0.0-py3-none-any.whl") |
|
|
|
wedefense = importlib.import_module("wedefense") |
|
if hasattr(wedefense, "__version__"): |
|
print(f"wedefense version: {wedefense.__version__}") |
|
|
|
|
|
|
|
model = wedefense.load_model(model_id="localization_MFHA_xlsr") |
|
|
|
|
|
def speaker_extraction(audio, audio_name): |
|
if audio == None: |
|
return gr.Warning( |
|
"The audio file cannot be empty, please upload a valid audio file. 音频文件不能为空,请上传有效的音频文件。" |
|
) |
|
if audio_name == "": |
|
return gr.Warning( |
|
"The audio name cannot be empty, please enter the audio name. 音频名称不能为空,请输入音频名称。" |
|
) |
|
logits = model.localization_logits(audio) |
|
|
|
rttm = model.logits_to_rttm(logits, |
|
audio_name, |
|
score_reso=20, |
|
bonafide_idx=0) |
|
rttm_str = "\n".join(rttm) |
|
|
|
return rttm_str |
|
|
|
|
|
inputs = [ |
|
gr.Audio(type="filepath", label='Input audio', show_download_button=True), |
|
gr.Textbox( |
|
lines=1, |
|
placeholder="Enter the <recoid> ...", |
|
label="Audio name", |
|
) |
|
] |
|
|
|
|
|
output = gr.Textbox(label="RTTM", ) |
|
|
|
|
|
description = ( |
|
"Fake audio localization demo!<br>Each line of the output RTTM file contains ten fields and follows the format:<br>Wedefense \<recoid\> \<channelid\> \<start_time\> \<duration\> \<NA\> \<NA\> \<predicted_label\> \<NA\> \<NA\>" |
|
) |
|
|
|
examples = [ |
|
['examples/CON_E_0000000.wav', 'CON_E_0000000'], |
|
['examples/CON_E_0000006.wav', 'CON_E_0000006'], |
|
['examples/CON_E_0000008.wav', 'CON_E_0000008'], |
|
['examples/LA_E_9992215.wav', 'LA_E_9992215'], |
|
] |
|
|
|
interface = gr.Interface( |
|
fn=speaker_extraction, |
|
inputs=inputs, |
|
outputs=output, |
|
title="WeDefense | Fake Audio Localization Demo", |
|
description=description, |
|
|
|
examples=examples) |
|
|
|
interface.launch() |
|
|