Spaces:

codetocare
/

emotion_api

Runtime error

codetocare commited on Oct 22

Commit

16dbaa3

verified ·

1 Parent(s): 0ae8940

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,21 +1,41 @@
 model_name = "Dpngtm/wav2vec2-emotion-recognition"
-processor  = Wav2Vec2Processor.from_pretrained(model_name)
-model      = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
 labels = ["angry", "calm", "disgust", "fearful", "happy", "neutral", "sad", "surprised"]
 def predict_emotion(audio):
     speech, sr = audio
     if sr != 16000:
         resampler = torchaudio.transforms.Resample(sr, 16000)
-        speech    = resampler(torch.tensor(speech))
     else:
-        speech    = torch.tensor(speech)
     input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values
     with torch.no_grad():
         logits = model(input_values).logits
     predicted_id = torch.argmax(logits, dim=-1).item()
-    emotion      = labels[predicted_id]
     return f"Predicted Emotion: **{emotion}**"

+import gradio as gr
+import torch
+import torchaudio
+from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
+# Load model and processor from Hugging Face
 model_name = "Dpngtm/wav2vec2-emotion-recognition"
+processor = Wav2Vec2Processor.from_pretrained(model_name)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
+# Emotion labels from the model card
 labels = ["angry", "calm", "disgust", "fearful", "happy", "neutral", "sad", "surprised"]
+# Emotion prediction function
 def predict_emotion(audio):
     speech, sr = audio
     if sr != 16000:
         resampler = torchaudio.transforms.Resample(sr, 16000)
+        speech = resampler(torch.tensor(speech))
     else:
+        speech = torch.tensor(speech)
     input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values
     with torch.no_grad():
         logits = model(input_values).logits
     predicted_id = torch.argmax(logits, dim=-1).item()
+    emotion = labels[predicted_id]
     return f"Predicted Emotion: **{emotion}**"
+# Gradio interface
+interface = gr.Interface(
+    fn=predict_emotion,
+    inputs=gr.Audio(source="microphone", type="numpy", label="Speak or Upload Audio"),
+    outputs=gr.Markdown(label="Detected Emotion"),
+    title="Voice Emotion Recognition",
+    description="This app detects the emotional tone of your speech using a fine-tuned Wav2Vec2 model."
+)
+interface.launch()