# Copyright (c) Wedefense Team. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import gradio as gr import subprocess import sys import os import importlib from setuptools.command import easy_install def install_local_package(package_path): absolute_path = os.path.abspath(package_path) try: result = subprocess.run([ sys.executable, "-m", "pip", "install", absolute_path], capture_output=True, text=True, check=True) print(f"install: {absolute_path}") return True except subprocess.CalledProcessError as e: print(f"Failed: {absolute_path}") print("Error message:", e.stderr) return False except Exception as e: print(f"unkown error: {e}") return False install_local_package("./wedefense-0.0.0-py3-none-any.whl") wedefense = importlib.import_module("wedefense") if hasattr(wedefense, "__version__"): print(f"wedefense version: {wedefense.__version__}") # pretrain_model_path = "./pretrained_model/" model = wedefense.load_model(model_id="localization_MFHA_xlsr") def speaker_extraction(audio, audio_name): if audio == None: return gr.Warning( "The audio file cannot be empty, please upload a valid audio file. 音频文件不能为空,请上传有效的音频文件。" ) if audio_name == "": return gr.Warning( "The audio name cannot be empty, please enter the audio name. 音频名称不能为空,请输入音频名称。" ) logits = model.localization_logits(audio) rttm = model.logits_to_rttm(logits, audio_name, score_reso=20, bonafide_idx=0) rttm_str = "\n".join(rttm) return rttm_str inputs = [ gr.Audio(type="filepath", label='Input audio', show_download_button=True), gr.Textbox( lines=1, placeholder="Enter the ...", label="Audio name", ) ] # output = gr.Audio(type="filepath",label="Extract Speaker") output = gr.Textbox(label="RTTM", ) # # description description = ( "Fake audio localization demo!
Each line of the output RTTM file contains ten fields and follows the format:
Wedefense \ \ \ \ \ \ \ \ \" ) examples = [ ['examples/CON_E_0000000.wav', 'CON_E_0000000'], ['examples/CON_E_0000006.wav', 'CON_E_0000006'], ['examples/CON_E_0000008.wav', 'CON_E_0000008'], ['examples/LA_E_9992215.wav', 'LA_E_9992215'], ] interface = gr.Interface( fn=speaker_extraction, inputs=inputs, outputs=output, title="WeDefense | Fake Audio Localization Demo", description=description, # article=article, examples=examples) interface.launch()