import gradio as gr, pandas as pd, zipfile, tempfile, shutil, pathlib, torch
from utmosv2_batch_predict import compute_spec, MAX_LEN         # reuse the function
from utmosv2.utils          import get_model
from types import SimpleNamespace
import importlib, torchaudio, numpy as np, torch.nn as nn

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- load UTMOSv2 once ---
cfg_mod = importlib.import_module("utmosv2.config.fusion_stage3")
cfg     = SimpleNamespace(**{k:getattr(cfg_mod,k)
                             for k in dir(cfg_mod) if not k.startswith("_")})
cfg.phase, cfg.data_config, cfg.print_config = "test", None, False
cfg.weight = "YOUR_FINE_TUNED_WEIGHT.ckpt"           # put the file in the repo
model      = get_model(cfg, DEVICE).eval()
specs_cfg  = cfg.dataset.specs

def run_space(csv_file, wav_zip, num_domains):
    """
    Inputs:
        csv_file – csv with 'audio' and optional 'method'
        wav_zip  – zip that contains all referenced .wav files
    Output:
        DataFrame shown + downloadable CSV
    """
    # ----- prepare wav directory -----
    tempdir = tempfile.mkdtemp()
    with zipfile.ZipFile(wav_zip.name) as zf:
        zf.extractall(tempdir)

    df   = pd.read_csv(csv_file.name)
    pred = []
    for relpath in df["audio"]:
        path = pathlib.Path(tempdir) / relpath
        wav, sr = torchaudio.load(path)
        if sr != 16_000:
            wav = torchaudio.transforms.Resample(sr, 16_000)(wav)[0]
        else:
            wav = wav[0]
        wav = (wav[:MAX_LEN] if wav.numel() > MAX_LEN
               else nn.functional.pad(wav,(0,MAX_LEN-wav.numel()))).to(DEVICE)

        spec  = compute_spec(wav, specs_cfg, DEVICE)
        dom_p = []
        for dom in range(int(num_domains)):
            dom_oh = torch.nn.functional.one_hot(
                        torch.tensor(dom,device=DEVICE),
                        num_classes=model.num_dataset).float()[None]
            with torch.no_grad():
                dom_p.append(model(wav[None], spec[None], dom_oh).item())
        pred.append(float(np.mean(dom_p)))

    shutil.rmtree(tempdir)
    df["pred_mos"] = pred
    out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".csv").name
    df.to_csv(out_path, index=False)
    return df, out_path                      # gr.File returns a link automatically

demo = gr.Interface(
    run_space,
    inputs=[
        gr.File(label="CSV (audio, method, MOS)", file_types=[".csv"]),
        gr.File(label="ZIP with .wav files",       file_types=[".zip"]),
        gr.Number(label="Training domains", value=8, precision=0)
    ],
    outputs=[
        gr.Dataframe(label="Results"),
        gr.File(label="Download predictions CSV")
    ],
    title="UTMOS-v2 MOS Estimator",
    description="Upload the ground-truth CSV and a ZIP containing all WAVs. "
                "The Space appends predicted MOS scores."
)

if __name__ == "__main__":
    demo.launch()