# Copyright (c) Wedefense Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import gradio as gr
import subprocess
import sys
import os
import importlib
from setuptools.command import easy_install


def install_local_package(package_path):
    absolute_path = os.path.abspath(package_path)
    try:
        result = subprocess.run([
            sys.executable, "-m", "pip", "install", absolute_path],
                                capture_output=True,
                                text=True,
                                check=True)
        print(f"install: {absolute_path}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"Failed: {absolute_path}")
        print("Error message:", e.stderr)
        return False
    except Exception as e:
        print(f"unkown error: {e}")
        return False


install_local_package("./wedefense-0.0.0-py3-none-any.whl")

wedefense = importlib.import_module("wedefense")
if hasattr(wedefense, "__version__"):
    print(f"wedefense version: {wedefense.__version__}")

# pretrain_model_path = "./pretrained_model/"

model = wedefense.load_model(model_id="localization_MFHA_xlsr")


def speaker_extraction(audio, audio_name):
    if audio == None:
        return gr.Warning(
            "The audio file cannot be empty, please upload a valid audio file. 音频文件不能为空，请上传有效的音频文件。"
        )
    if audio_name == "":
        return gr.Warning(
            "The audio name cannot be empty, please enter the audio name. 音频名称不能为空，请输入音频名称。"
        )
    logits = model.localization_logits(audio)

    rttm = model.logits_to_rttm(logits,
                                audio_name,
                                score_reso=20,
                                bonafide_idx=0)
    rttm_str = "\n".join(rttm)

    return rttm_str


inputs = [
    gr.Audio(type="filepath", label='Input audio', show_download_button=True),
    gr.Textbox(
        lines=1,
        placeholder="Enter the <recoid> ...",
        label="Audio name",
    )
]

# output = gr.Audio(type="filepath",label="Extract Speaker")
output = gr.Textbox(label="RTTM", )

# # description
description = (
    "Fake audio localization demo!<br>Each line of the output RTTM file contains ten fields and follows the format:<br>Wedefense \<recoid\> \<channelid\>  \<start_time\> \<duration\> \<NA\> \<NA\> \<predicted_label\> \<NA\> \<NA\>"
)

examples = [
    ['examples/CON_E_0000000.wav', 'CON_E_0000000'],
    ['examples/CON_E_0000006.wav', 'CON_E_0000006'],
    ['examples/CON_E_0000008.wav', 'CON_E_0000008'],
    ['examples/LA_E_9992215.wav', 'LA_E_9992215'],
]

interface = gr.Interface(
    fn=speaker_extraction,
    inputs=inputs,
    outputs=output,
    title="WeDefense | Fake Audio Localization Demo",
    description=description,
    # article=article,
    examples=examples)

interface.launch()