# -*- coding: utf-8 -*- """app.py Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1eyNEXhQE4T_7cq-MsPQ77p7h6xdrOpzk """ import gradio as gr import torch from transformers import AutoModelForSequenceClassification, AutoTokenizer import nemo.collections.asr as nemo_asr # STT 모델 로드 stt_model = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="SungBeom/stt_kr_conformer_ctc_medium") # 우울증 판별 모델 로드 model_path = "./model" model = AutoModelForSequenceClassification.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained("klue/bert-base") # STT 변환 함수 def speech_to_text(audio_file): transcription = stt_model.transcribe(paths2audio_files=[audio_file])[0] return transcription # 우울증 판별 함수 def predict_depression(text): inputs = tokenizer(text, return_tensors="pt") outputs = model(**inputs) probabilities = torch.sigmoid(outputs.logits) depression_prob = probabilities[0, 1].item() if depression_prob > 0.5: return f"Depressed (Confidence: {depression_prob:.2%})" else: return f"Not Depressed (Confidence: {1 - depression_prob:.2%})" # 음성 입력 -> 우울증 판별 함수 def speech_to_depression(audio_file): text = speech_to_text(audio_file) result = predict_depression(text) return {"Transcription": text, "Depression Result": result} # Gradio 인터페이스 interface = gr.Interface( fn=speech_to_depression, inputs=gr.Audio(source="microphone", type="filepath", label="Speak here"), outputs=[ gr.Textbox(label="Transcription"), gr.Textbox(label="Depression Detection Result"), ], title="Speech-to-Depression Detection", description="Speak into the microphone. The app will transcribe your speech and detect depression likelihood.", ) interface.launch()