cdliang11
fix error
881bb12
raw
history blame
3.43 kB
# Copyright (c) Wedefense Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gradio as gr
import subprocess
import sys
import os
import importlib
from setuptools.command import easy_install
def install_local_package(package_path):
absolute_path = os.path.abspath(package_path)
try:
result = subprocess.run([
sys.executable, "-m", "pip", "install", absolute_path],
capture_output=True,
text=True,
check=True)
print(f"install: {absolute_path}")
return True
except subprocess.CalledProcessError as e:
print(f"Failed: {absolute_path}")
print("Error message:", e.stderr)
return False
except Exception as e:
print(f"unkown error: {e}")
return False
install_local_package("./wedefense-0.0.0-py3-none-any.whl")
wedefense = importlib.import_module("wedefense")
if hasattr(wedefense, "__version__"):
print(f"wedefense version: {wedefense.__version__}")
# pretrain_model_path = "./pretrained_model/"
model = wedefense.load_model(model_id="localization_MFHA_xlsr")
def speaker_extraction(audio, audio_name):
if audio == None:
return gr.Warning(
"The audio file cannot be empty, please upload a valid audio file. 音频文件不能为空,请上传有效的音频文件。"
)
if audio_name == "":
return gr.Warning(
"The audio name cannot be empty, please enter the audio name. 音频名称不能为空,请输入音频名称。"
)
logits = model.localization_logits(audio)
rttm = model.logits_to_rttm(logits,
audio_name,
score_reso=20,
bonafide_idx=0)
rttm_str = "\n".join(rttm)
return rttm_str
inputs = [
gr.Audio(type="filepath", label='Input audio', show_download_button=True),
gr.Textbox(
lines=1,
placeholder="Enter the <recoid> ...",
label="Audio name",
)
]
# output = gr.Audio(type="filepath",label="Extract Speaker")
output = gr.Textbox(label="RTTM", )
# # description
description = (
"Fake audio localization demo!<br>Each line of the output RTTM file contains ten fields and follows the format:<br>Wedefense \<recoid\> \<channelid\> \<start_time\> \<duration\> \<NA\> \<NA\> \<predicted_label\> \<NA\> \<NA\>"
)
examples = [
['examples/CON_E_0000000.wav', 'CON_E_0000000'],
['examples/CON_E_0000006.wav', 'CON_E_0000006'],
['examples/CON_E_0000008.wav', 'CON_E_0000008'],
['examples/LA_E_9992215.wav', 'LA_E_9992215'],
]
interface = gr.Interface(
fn=speaker_extraction,
inputs=inputs,
outputs=output,
title="WeDefense | Fake Audio Localization Demo",
description=description,
# article=article,
examples=examples)
interface.launch()