# -*- coding: utf-8 -*-
"""app.py

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1eyNEXhQE4T_7cq-MsPQ77p7h6xdrOpzk
"""
import gradio as gr
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import nemo.collections.asr as nemo_asr

# STT 모델 로드
stt_model = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="SungBeom/stt_kr_conformer_ctc_medium")

# 우울증 판별 모델 로드
model_path = "./model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained("klue/bert-base")

# STT 변환 함수
def speech_to_text(audio_file):
    transcription = stt_model.transcribe(paths2audio_files=[audio_file])[0]
    return transcription

# 우울증 판별 함수
def predict_depression(text):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = model(**inputs)
    probabilities = torch.sigmoid(outputs.logits)
    depression_prob = probabilities[0, 1].item()
    if depression_prob > 0.5:
        return f"Depressed (Confidence: {depression_prob:.2%})"
    else:
        return f"Not Depressed (Confidence: {1 - depression_prob:.2%})"

# 음성 입력 -> 우울증 판별 함수
def speech_to_depression(audio_file):
    text = speech_to_text(audio_file)
    result = predict_depression(text)
    return {"Transcription": text, "Depression Result": result}

# Gradio 인터페이스
interface = gr.Interface(
    fn=speech_to_depression,
    inputs=gr.Audio(source="microphone", type="filepath", label="Speak here"),
    outputs=[
        gr.Textbox(label="Transcription"),
        gr.Textbox(label="Depression Detection Result"),
    ],
    title="Speech-to-Depression Detection",
    description="Speak into the microphone. The app will transcribe your speech and detect depression likelihood.",
)

interface.launch()