import gradio as gr
import torch
import os
import pandas as pd
from types import SimpleNamespace

from extractor.extract_rf_feats import VideoDataset_feature
from extractor.extract_slowfast_clip import SlowFast, extract_features_slowfast_pool
from extractor.extract_swint_clip import SwinT, extract_features_swint_pool
from model_regression import Mlp, preprocess_data
from demo_test import evaluate_video_quality, load_model, get_transform


def run_diva_vqa(video_path, is_finetune, train_data_name, test_data_name, network_name):
    if not os.path.exists(video_path):
        return "❌ No video uploaded or the uploaded file has expired. Please upload again."

    # print("CUDA available:", torch.cuda.is_available())
    # print("Current device:", torch.cuda.current_device())

    config = SimpleNamespace(**{
        'select_criteria': 'byrmse',
        'is_finetune': is_finetune,
        'save_path': 'model/',
        'train_data_name': train_data_name,
        'test_data_name': test_data_name,
        'test_video_path': video_path,
        'network_name': network_name,
        'num_workers': 0,
        'resize': 224,
        'patch_size': 16,
        'target_size': 224,
        'model_name': 'Mlp',
    })
    print(config.test_video_path)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # test demo video
    resize_transform = get_transform(config.resize)
    top_n = int(config.target_size /config. patch_size) * int(config.target_size / config.patch_size)
    data = {'vid': [os.path.splitext(os.path.basename(config.test_video_path))[0]],
        'test_data_name': [config.test_data_name],
        'test_video_path': [config.test_video_path]}
    videos_dir = os.path.dirname(config.test_video_path)
    test_df = pd.DataFrame(data)
    print(test_df.T)

    dataset = VideoDataset_feature(videos_dir, test_df, resize_transform, config.resize, config.test_data_name, config.patch_size, config.target_size, top_n)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=False, num_workers=min(config.num_workers, os.cpu_count()), pin_memory=True
    )

    # load models to device
    model_slowfast = SlowFast().to(device)
    if config.network_name == 'diva-vqa':
        model_swint = SwinT(global_pool='avg').to(device) # 'swin_base_patch4_window7_224.ms_in22k_ft_in1k'
        input_features = 9984
    elif config.network_name == 'diva-vqa_large':
        model_swint = SwinT(model_name='swin_large_patch4_window7_224', global_pool='avg', pretrained=True).to(device)
        input_features = 11520
    model_mlp = load_model(config, device, input_features)

    try:
        score, runtime = evaluate_video_quality(config, data_loader, model_slowfast, model_swint, model_mlp, device)
        return f"Predicted Quality Score: {score:.4f} (in {runtime:.2f}s)"
    except Exception as e:
        return f"❌ Error: {str(e)}"
    finally:
        if "gradio" in video_path and os.path.exists(video_path):
            os.remove(video_path)


demo = gr.Interface(
    fn=run_diva_vqa,
    inputs=[
        gr.Video(label="Upload a Video （e.g. mp4）"),
        gr.Checkbox(label="Use Finetuning?", value=False),
        gr.Dropdown(label="Train Dataset Name", choices=["konvid_1k", "youtube_ugc", "live_vqc", "lsvq_train", "other"], value="lsvq_train"),
        gr.Dropdown(label="Test Dataset Name", choices=["konvid_1k", "youtube_ugc", "live_vqc", "lsvq", "other"], value="konvid_1k"),
        gr.Dropdown(label="Our Models", choices=["diva-vqa", "diva-vqa_large"], value="diva-vqa_large")
    ],
    outputs=gr.Textbox(label="Predicted Perceptual Quality Score (0–100)"),

    title="🎬 DIVA-VQA Online Demo",
    description=(
        "Upload a short video and get the predicted perceptual quality score using the DIVA-VQA model. "
        "You can try our demo video from the "
        "<a href='https://huggingface.co/spaces/xinyiW915/DIVA-VQA/blob/main/ugc_original_videos/5636101558_540p.mp4' target='_blank'>demo video</a>. "
        "<br><br>"
        "⚙️ This demo is currently running on <strong>Hugging Face CPU Basic</strong>: 2 vCPU • 16 GB RAM."
        # "⚙️ This demo is currently running on <strong>Hugging Face ZeroGPU Space</strong>: Dynamic resources (NVIDIA A100)."


    ),
)

demo.launch()