ireneminhee's picture
Upload 4 files
a36df50 verified
raw
history blame
1.9 kB
# -*- coding: utf-8 -*-
"""app.py
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1eyNEXhQE4T_7cq-MsPQ77p7h6xdrOpzk
"""
import gradio as gr
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import nemo.collections.asr as nemo_asr
# STT λͺ¨λΈ λ‘œλ“œ
stt_model = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="SungBeom/stt_kr_conformer_ctc_medium")
# 우울증 νŒλ³„ λͺ¨λΈ λ‘œλ“œ
model_path = "./model"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained("klue/bert-base")
# STT λ³€ν™˜ ν•¨μˆ˜
def speech_to_text(audio_file):
transcription = stt_model.transcribe(paths2audio_files=[audio_file])[0]
return transcription
# 우울증 νŒλ³„ ν•¨μˆ˜
def predict_depression(text):
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs)
probabilities = torch.sigmoid(outputs.logits)
depression_prob = probabilities[0, 1].item()
if depression_prob > 0.5:
return f"Depressed (Confidence: {depression_prob:.2%})"
else:
return f"Not Depressed (Confidence: {1 - depression_prob:.2%})"
# μŒμ„± μž…λ ₯ -> 우울증 νŒλ³„ ν•¨μˆ˜
def speech_to_depression(audio_file):
text = speech_to_text(audio_file)
result = predict_depression(text)
return {"Transcription": text, "Depression Result": result}
# Gradio μΈν„°νŽ˜μ΄μŠ€
interface = gr.Interface(
fn=speech_to_depression,
inputs=gr.Audio(source="microphone", type="filepath", label="Speak here"),
outputs=[
gr.Textbox(label="Transcription"),
gr.Textbox(label="Depression Detection Result"),
],
title="Speech-to-Depression Detection",
description="Speak into the microphone. The app will transcribe your speech and detect depression likelihood.",
)
interface.launch()