Spaces:

wedefense
/

fake_audio_localization_demo

Sleeping

cdliang11

fix error

881bb12 3 months ago

3.43 kB

	# Copyright (c) Wedefense Team. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import gradio as gr
	import subprocess
	import sys
	import os
	import importlib
	from setuptools.command import easy_install


	def install_local_package(package_path):
	absolute_path = os.path.abspath(package_path)
	try:
	result = subprocess.run([
	sys.executable, "-m", "pip", "install", absolute_path],
	capture_output=True,
	text=True,
	check=True)
	print(f"install: {absolute_path}")
	return True
	except subprocess.CalledProcessError as e:
	print(f"Failed: {absolute_path}")
	print("Error message:", e.stderr)
	return False
	except Exception as e:
	print(f"unkown error: {e}")
	return False


	install_local_package("./wedefense-0.0.0-py3-none-any.whl")

	wedefense = importlib.import_module("wedefense")
	if hasattr(wedefense, "__version__"):
	print(f"wedefense version: {wedefense.__version__}")

	# pretrain_model_path = "./pretrained_model/"

	model = wedefense.load_model(model_id="localization_MFHA_xlsr")


	def speaker_extraction(audio, audio_name):
	if audio == None:
	return gr.Warning(
	"The audio file cannot be empty, please upload a valid audio file. 音频文件不能为空，请上传有效的音频文件。"
	)
	if audio_name == "":
	return gr.Warning(
	"The audio name cannot be empty, please enter the audio name. 音频名称不能为空，请输入音频名称。"
	)
	logits = model.localization_logits(audio)

	rttm = model.logits_to_rttm(logits,
	audio_name,
	score_reso=20,
	bonafide_idx=0)
	rttm_str = "\n".join(rttm)

	return rttm_str


	inputs = [
	gr.Audio(type="filepath", label='Input audio', show_download_button=True),
	gr.Textbox(
	lines=1,
	placeholder="Enter the <recoid> ...",
	label="Audio name",
	)
	]

	# output = gr.Audio(type="filepath",label="Extract Speaker")
	output = gr.Textbox(label="RTTM", )

	# # description
	description = (
	"Fake audio localization demo!<br>Each line of the output RTTM file contains ten fields and follows the format:<br>Wedefense \<recoid\> \<channelid\> \<start_time\> \<duration\> \<NA\> \<NA\> \<predicted_label\> \<NA\> \<NA\>"
	)

	examples = [
	['examples/CON_E_0000000.wav', 'CON_E_0000000'],
	['examples/CON_E_0000006.wav', 'CON_E_0000006'],
	['examples/CON_E_0000008.wav', 'CON_E_0000008'],
	['examples/LA_E_9992215.wav', 'LA_E_9992215'],
	]

	interface = gr.Interface(
	fn=speaker_extraction,
	inputs=inputs,
	outputs=output,
	title="WeDefense \| Fake Audio Localization Demo",
	description=description,
	# article=article,
	examples=examples)

	interface.launch()