codetocare commited on
Commit
16dbaa3
Β·
verified Β·
1 Parent(s): 0ae8940

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -5
app.py CHANGED
@@ -1,21 +1,41 @@
 
 
 
 
 
 
1
  model_name = "Dpngtm/wav2vec2-emotion-recognition"
2
- processor = Wav2Vec2Processor.from_pretrained(model_name)
3
- model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
4
 
 
5
  labels = ["angry", "calm", "disgust", "fearful", "happy", "neutral", "sad", "surprised"]
6
 
 
7
  def predict_emotion(audio):
8
  speech, sr = audio
9
  if sr != 16000:
10
  resampler = torchaudio.transforms.Resample(sr, 16000)
11
- speech = resampler(torch.tensor(speech))
12
  else:
13
- speech = torch.tensor(speech)
14
 
15
  input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values
 
16
  with torch.no_grad():
17
  logits = model(input_values).logits
18
 
19
  predicted_id = torch.argmax(logits, dim=-1).item()
20
- emotion = labels[predicted_id]
21
  return f"Predicted Emotion: **{emotion}**"
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
5
+
6
+ # Load model and processor from Hugging Face
7
  model_name = "Dpngtm/wav2vec2-emotion-recognition"
8
+ processor = Wav2Vec2Processor.from_pretrained(model_name)
9
+ model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
10
 
11
+ # Emotion labels from the model card
12
  labels = ["angry", "calm", "disgust", "fearful", "happy", "neutral", "sad", "surprised"]
13
 
14
+ # Emotion prediction function
15
  def predict_emotion(audio):
16
  speech, sr = audio
17
  if sr != 16000:
18
  resampler = torchaudio.transforms.Resample(sr, 16000)
19
+ speech = resampler(torch.tensor(speech))
20
  else:
21
+ speech = torch.tensor(speech)
22
 
23
  input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values
24
+
25
  with torch.no_grad():
26
  logits = model(input_values).logits
27
 
28
  predicted_id = torch.argmax(logits, dim=-1).item()
29
+ emotion = labels[predicted_id]
30
  return f"Predicted Emotion: **{emotion}**"
31
+
32
+ # Gradio interface
33
+ interface = gr.Interface(
34
+ fn=predict_emotion,
35
+ inputs=gr.Audio(source="microphone", type="numpy", label="Speak or Upload Audio"),
36
+ outputs=gr.Markdown(label="Detected Emotion"),
37
+ title="Voice Emotion Recognition",
38
+ description="This app detects the emotional tone of your speech using a fine-tuned Wav2Vec2 model."
39
+ )
40
+
41
+ interface.launch()