Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
"""app.py | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/1eyNEXhQE4T_7cq-MsPQ77p7h6xdrOpzk | |
""" | |
import gradio as gr | |
import torch | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
import nemo.collections.asr as nemo_asr | |
# STT λͺ¨λΈ λ‘λ | |
stt_model = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="SungBeom/stt_kr_conformer_ctc_medium") | |
# μ°μΈμ¦ νλ³ λͺ¨λΈ λ‘λ | |
model_path = "./model" | |
model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
tokenizer = AutoTokenizer.from_pretrained("klue/bert-base") | |
# STT λ³ν ν¨μ | |
def speech_to_text(audio_file): | |
transcription = stt_model.transcribe(paths2audio_files=[audio_file])[0] | |
return transcription | |
# μ°μΈμ¦ νλ³ ν¨μ | |
def predict_depression(text): | |
inputs = tokenizer(text, return_tensors="pt") | |
outputs = model(**inputs) | |
probabilities = torch.sigmoid(outputs.logits) | |
depression_prob = probabilities[0, 1].item() | |
if depression_prob > 0.5: | |
return f"Depressed (Confidence: {depression_prob:.2%})" | |
else: | |
return f"Not Depressed (Confidence: {1 - depression_prob:.2%})" | |
# μμ± μ λ ₯ -> μ°μΈμ¦ νλ³ ν¨μ | |
def speech_to_depression(audio_file): | |
text = speech_to_text(audio_file) | |
result = predict_depression(text) | |
return {"Transcription": text, "Depression Result": result} | |
# Gradio μΈν°νμ΄μ€ | |
interface = gr.Interface( | |
fn=speech_to_depression, | |
inputs=gr.Audio(source="microphone", type="filepath", label="Speak here"), | |
outputs=[ | |
gr.Textbox(label="Transcription"), | |
gr.Textbox(label="Depression Detection Result"), | |
], | |
title="Speech-to-Depression Detection", | |
description="Speak into the microphone. The app will transcribe your speech and detect depression likelihood.", | |
) | |
interface.launch() | |