Spaces:
Build error
Build error
Rename minor_project_3.py to utils.py
Browse files- minor_project_3.py +0 -402
- utils.py +85 -0
minor_project_3.py
DELETED
@@ -1,402 +0,0 @@
|
|
1 |
-
# -*- coding: utf-8 -*-
|
2 |
-
"""Minor_project_3
|
3 |
-
Automatically generated by Colaboratory.
|
4 |
-
Original file is located at
|
5 |
-
https://colab.research.google.com/drive/1rvtDpt05RT7jCrMVUg-UyLcUjcnhI9TO
|
6 |
-
# Importing libraries
|
7 |
-
"""
|
8 |
-
|
9 |
-
!pip install nlp
|
10 |
-
!pip install datasets
|
11 |
-
import tensorflow as tf
|
12 |
-
import numpy as np
|
13 |
-
import pandas as pd
|
14 |
-
from wordcloud import WordCloud
|
15 |
-
import seaborn as sns
|
16 |
-
import matplotlib.pyplot as plt
|
17 |
-
import plotly.express as px
|
18 |
-
import plotly.graph_objects as go
|
19 |
-
import nltk
|
20 |
-
from nltk.tokenize import word_tokenize
|
21 |
-
from nltk.corpus import stopwords
|
22 |
-
from nltk.stem import WordNetLemmatizer
|
23 |
-
import nlp
|
24 |
-
from tensorflow.keras.preprocessing.text import Tokenizer
|
25 |
-
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
26 |
-
from keras.layers import Dense, Dropout
|
27 |
-
from keras.layers import LSTM
|
28 |
-
from keras.models import Sequential
|
29 |
-
from keras.layers import Embedding
|
30 |
-
from keras.layers import Flatten
|
31 |
-
from keras.layers import Bidirectional
|
32 |
-
from keras.callbacks import EarlyStopping
|
33 |
-
from keras.layers import GlobalAvgPool1D
|
34 |
-
import random
|
35 |
-
|
36 |
-
import os
|
37 |
-
|
38 |
-
!pip install -U datasets
|
39 |
-
|
40 |
-
"""# Importing the dataset
|
41 |
-
"""
|
42 |
-
|
43 |
-
# Importing the dataset
|
44 |
-
from datasets import load_dataset
|
45 |
-
|
46 |
-
data = load_dataset("emotion", download_mode="force_redownload")
|
47 |
-
|
48 |
-
# Converting the train, validation and test datasets into DataFrame format
|
49 |
-
train = pd.DataFrame(data['train'])
|
50 |
-
validation = pd.DataFrame(data['validation'])
|
51 |
-
test = pd.DataFrame(data['test'])
|
52 |
-
|
53 |
-
"""We will be detecting five emotions which are: sadness, anger, love, surprise, fear, joy using DNN
|
54 |
-
> Indented block
|
55 |
-
"""
|
56 |
-
|
57 |
-
# Train dataset
|
58 |
-
train.head(10)
|
59 |
-
|
60 |
-
train['label'].unique()
|
61 |
-
|
62 |
-
# emotion_mapping = {
|
63 |
-
# 0: 'sadness',
|
64 |
-
# 1: 'joy',
|
65 |
-
# 2: 'love',
|
66 |
-
# 3: 'anger',
|
67 |
-
# 4: 'fear',
|
68 |
-
# 5: 'surprise'
|
69 |
-
# }
|
70 |
-
|
71 |
-
# # Replace numeric labels with textual emotions
|
72 |
-
# train['label'] = train['label'].map(emotion_mapping).fillna('Unknown')
|
73 |
-
# train.head()
|
74 |
-
|
75 |
-
# train['label'] = {0: "sadness",1: "joy", 2:"love",3:"anger",4:"fear",5:"surprise"}
|
76 |
-
# train.head()
|
77 |
-
|
78 |
-
train['length_of_text'] = [len(i.split(' ')) for i in train['text']]
|
79 |
-
|
80 |
-
fig = px.histogram(train['length_of_text'], marginal='box',
|
81 |
-
labels={"value": "Length of the Text"})
|
82 |
-
|
83 |
-
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
|
84 |
-
fig.update_layout(title_text='Distribution of the Length of the Texts',
|
85 |
-
title_x=0.5, title_font=dict(size=22))
|
86 |
-
fig.show()
|
87 |
-
|
88 |
-
fig = px.histogram(train['length_of_text'], marginal='box',
|
89 |
-
labels={"value": "Length of the Text"},
|
90 |
-
color=train['label'])
|
91 |
-
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
|
92 |
-
fig.update_layout(title_text='Distribution of the Length of the Texts by Emotions',
|
93 |
-
title_x=0.5, title_font=dict(size=22))
|
94 |
-
fig.show()
|
95 |
-
|
96 |
-
fig = px.histogram(train, x='label', color='label')
|
97 |
-
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
|
98 |
-
fig.update_layout(title_text='Distribution of the Labels',
|
99 |
-
title_x=0.5, title_font=dict(size=22))
|
100 |
-
fig.show()
|
101 |
-
|
102 |
-
nltk.download('punkt')
|
103 |
-
nltk.download('stopwords')
|
104 |
-
|
105 |
-
"""# Tokenization"""
|
106 |
-
|
107 |
-
def tokenization(inputs):
|
108 |
-
return word_tokenize(inputs) #REFERENCE[1]
|
109 |
-
|
110 |
-
|
111 |
-
train['text_tokenized'] = train['text'].apply(tokenization)
|
112 |
-
validation['text_tokenized'] = validation['text'].apply(tokenization)
|
113 |
-
|
114 |
-
train.head()
|
115 |
-
|
116 |
-
"""# Stopword removal"""
|
117 |
-
|
118 |
-
stop_words = set(stopwords.words('english'))
|
119 |
-
|
120 |
-
def stopwords_remove(inputs):
|
121 |
-
return [item for item in inputs if item not in stop_words]
|
122 |
-
|
123 |
-
train['text_stop'] = train['text_tokenized'].apply(stopwords_remove)
|
124 |
-
validation['text_stop'] = validation['text_tokenized'].apply(stopwords_remove)
|
125 |
-
|
126 |
-
train.head()
|
127 |
-
|
128 |
-
"""# Lemmatization"""
|
129 |
-
|
130 |
-
nltk.download('wordnet')
|
131 |
-
|
132 |
-
lemmatizer = WordNetLemmatizer()
|
133 |
-
|
134 |
-
def lemmatization(inputs):
|
135 |
-
return [lemmatizer.lemmatize(word=x, pos='v') for x in inputs]
|
136 |
-
|
137 |
-
train['text_lemmatized'] = train['text_stop'].apply(lemmatization)
|
138 |
-
validation['text_lemmatized'] = validation['text_stop'].apply(lemmatization)
|
139 |
-
|
140 |
-
train.head()
|
141 |
-
|
142 |
-
train['text_cleaned'] = train['text_lemmatized'].str.join(' ')
|
143 |
-
validation['text_cleaned'] = validation['text_lemmatized'].str.join(' ')
|
144 |
-
|
145 |
-
train.head() # Final form of the dataset
|
146 |
-
|
147 |
-
WordCloud = WordCloud(max_words=100,
|
148 |
-
random_state=30,
|
149 |
-
collocations=True).generate(str((train['text_cleaned'])))
|
150 |
-
|
151 |
-
plt.figure(figsize=(15, 8))
|
152 |
-
plt.imshow(WordCloud, interpolation='bilinear')
|
153 |
-
plt.axis("off")
|
154 |
-
plt.show()
|
155 |
-
|
156 |
-
num_words = 10000
|
157 |
-
tokenizer = Tokenizer(num_words=num_words, oov_token='<OOV>')
|
158 |
-
tokenizer.fit_on_texts(train['text_cleaned'])
|
159 |
-
|
160 |
-
word_index = tokenizer.word_index
|
161 |
-
# print(word_index)
|
162 |
-
|
163 |
-
Tokenized_train = tokenizer.texts_to_sequences(train['text_cleaned'])
|
164 |
-
Tokenized_val = tokenizer.texts_to_sequences(validation['text_cleaned'])
|
165 |
-
|
166 |
-
print('Non-tokenized Version: ', train['text_cleaned'][0])
|
167 |
-
print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][0]]))
|
168 |
-
print('--'*50)
|
169 |
-
print('Non-tokenized Version: ', train['text_cleaned'][10])
|
170 |
-
print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][10]]))
|
171 |
-
print('--'*50)
|
172 |
-
print('Non-tokenized Version: ', train['text'][100])
|
173 |
-
print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][100]]))
|
174 |
-
|
175 |
-
maxlen = 40
|
176 |
-
Padded_train = pad_sequences(Tokenized_train, maxlen=maxlen, padding='pre')
|
177 |
-
Padded_val = pad_sequences(Tokenized_val, maxlen=maxlen, padding='pre')
|
178 |
-
|
179 |
-
print('Non-padded Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][0]]))
|
180 |
-
print('Padded Version: ', Padded_train[0])
|
181 |
-
print('--'*50)
|
182 |
-
print('Non-padded Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][10]]))
|
183 |
-
print('Padded Version: ', Padded_train[10])
|
184 |
-
|
185 |
-
"""# Model building"""
|
186 |
-
|
187 |
-
lstm_model = Sequential()
|
188 |
-
|
189 |
-
lstm_model.add(Embedding(num_words, 16, input_length=maxlen))
|
190 |
-
lstm_model.add(GlobalAvgPool1D())
|
191 |
-
|
192 |
-
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(50, return_sequences=True, activation='relu'))
|
193 |
-
lstm_model.add(Dropout(0.3))
|
194 |
-
|
195 |
-
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, activation='relu', return_sequences=True))
|
196 |
-
lstm_model.add(Dropout(0.3))
|
197 |
-
|
198 |
-
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, activation='relu'))
|
199 |
-
lstm_model.add(Dropout(0.3))
|
200 |
-
|
201 |
-
lstm_model.add(Dense(6, activation='softmax'))
|
202 |
-
|
203 |
-
lstm_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
204 |
-
|
205 |
-
lstm_model.summary()
|
206 |
-
|
207 |
-
train['label'] = train['label']
|
208 |
-
validation['label'] = validation['label']
|
209 |
-
train.head()
|
210 |
-
|
211 |
-
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', mode='auto', patience=5,
|
212 |
-
restore_best_weights=True)
|
213 |
-
|
214 |
-
epochs = 100
|
215 |
-
hist = lstm_model.fit(Padded_train, train['label'], epochs=epochs,
|
216 |
-
validation_data=(Padded_val, validation['label']),
|
217 |
-
callbacks=[early_stopping])
|
218 |
-
|
219 |
-
plt.figure(figsize=(15, 8))
|
220 |
-
plt.plot(hist.history['loss'], label='Train Loss')
|
221 |
-
plt.plot(hist.history['val_loss'], label='Validation Loss')
|
222 |
-
plt.title('Train and Validation Loss Graphs')
|
223 |
-
plt.xlabel('Epochs')
|
224 |
-
plt.ylabel('Loss')
|
225 |
-
plt.legend()
|
226 |
-
|
227 |
-
test['text_tokenized'] = test['text'].apply(tokenization)
|
228 |
-
test['text_stop'] = test['text_tokenized'].apply(stopwords_remove)
|
229 |
-
test['text_lemmatized'] = test['text_stop'].apply(lemmatization)
|
230 |
-
test['text_cleaned'] = test['text_lemmatized'].str.join(' ')
|
231 |
-
|
232 |
-
Tokenized_test = tokenizer.texts_to_sequences(test['text_cleaned'])
|
233 |
-
Padded_test = pad_sequences(Tokenized_test, maxlen=maxlen, padding='pre')
|
234 |
-
|
235 |
-
test['label'] = test['label']
|
236 |
-
|
237 |
-
test_evaluate = lstm_model.evaluate(Padded_test, test['label'])
|
238 |
-
|
239 |
-
test.head()
|
240 |
-
|
241 |
-
import pandas as pd
|
242 |
-
import numpy as np
|
243 |
-
from sklearn.metrics import confusion_matrix
|
244 |
-
import seaborn as sns
|
245 |
-
import matplotlib.pyplot as plt
|
246 |
-
from keras.preprocessing.sequence import pad_sequences
|
247 |
-
|
248 |
-
# Assuming you have imported necessary libraries, defined Tokenized_test, Padded_test, and test dataframe
|
249 |
-
|
250 |
-
# Make predictions on test data
|
251 |
-
predictions = lstm_model.predict(Padded_test)
|
252 |
-
predicted_labels = np.argmax(predictions, axis=1) # Assuming the output is categorical
|
253 |
-
|
254 |
-
# Compute confusion matrix
|
255 |
-
cm = confusion_matrix(test['label'], predicted_labels)
|
256 |
-
|
257 |
-
# Plot confusion matrix as a heatmap
|
258 |
-
def plot_confusion_matrix(conf_matrix, class_names):
|
259 |
-
plt.figure(figsize=(8, 6))
|
260 |
-
sns.heatmap(conf_matrix, annot=True, cmap='Blues', fmt='g', xticklabels=class_names, yticklabels=class_names)
|
261 |
-
plt.xlabel('Predicted Labels')
|
262 |
-
plt.ylabel('True Labels')
|
263 |
-
plt.title('Confusion Matrix')
|
264 |
-
plt.show()
|
265 |
-
|
266 |
-
# Define class names (labels)
|
267 |
-
class_names = np.unique(test['label'])
|
268 |
-
|
269 |
-
# Plot confusion matrix
|
270 |
-
plot_confusion_matrix(cm, class_names)
|
271 |
-
|
272 |
-
"""# Making predictions"""
|
273 |
-
|
274 |
-
def make_predictions(text_input):
|
275 |
-
text_input = str(text_input)
|
276 |
-
text_input = tokenization(text_input)
|
277 |
-
text_input = stopwords_remove(text_input)
|
278 |
-
text_input = lemmatization(text_input)
|
279 |
-
text_input = ' '.join(text_input)
|
280 |
-
text_input = tokenizer.texts_to_sequences([text_input])
|
281 |
-
text_input = pad_sequences(text_input, maxlen=maxlen, padding='pre')
|
282 |
-
text_input = np.argmax(lstm_model.predict(text_input))
|
283 |
-
if text_input == 0:
|
284 |
-
return 'Predicted Emotion: Sadness'
|
285 |
-
elif text_input == 1:
|
286 |
-
return 'Predicted Emotion: Joy'
|
287 |
-
elif text_input == 2:
|
288 |
-
return 'Predicted Emotion: Love'
|
289 |
-
elif text_input == 3:
|
290 |
-
return 'Predicted Emotion: Anger'
|
291 |
-
elif text_input == 4:
|
292 |
-
return 'Predicted Emotion: Fear'
|
293 |
-
else:
|
294 |
-
return 'Predicted Emotion: Surprise'
|
295 |
-
return text_input
|
296 |
-
|
297 |
-
# label_ = {0: "Sadness", 1: "Joy", 2: "Love", 3: "Anger", 4: "Fear", 5: "Surprise"}
|
298 |
-
# test['label'] = test['label'].replace(label_)
|
299 |
-
|
300 |
-
# # Randomly chosen Test Dataset data points
|
301 |
-
# i = random.randint(0, len(test) - 1)
|
302 |
-
|
303 |
-
# print('Test Text:', test['text'][i])
|
304 |
-
# print(' ')
|
305 |
-
# print('Actual Emotion:', test['label'][i])
|
306 |
-
# make_predictions(test['text'][i])
|
307 |
-
# print('-'*50)
|
308 |
-
# print('Test Text:', test['text'][i+1])
|
309 |
-
# print(' ')
|
310 |
-
# print('Actual Emotion:', test['label'][i+1])
|
311 |
-
# make_predictions(test['text'][i+1])
|
312 |
-
|
313 |
-
ans = make_predictions('No one told you when to run, you missed the starting gun')
|
314 |
-
ans
|
315 |
-
|
316 |
-
ans2 = make_predictions("I just asked one question to confirm his request, and my boss bit my head off.")
|
317 |
-
ans2
|
318 |
-
|
319 |
-
ans3 = make_predictions("She’s flying high after the successful product launch.")
|
320 |
-
ans3
|
321 |
-
|
322 |
-
ans4 = make_predictions("I’m going to have the first meeting with a big client tomorrow, and I’m feeling butterflies in my stomach")
|
323 |
-
ans4
|
324 |
-
|
325 |
-
ans5 = make_predictions("Sometimes the people who appear to be the most confident are actually afraid of their own shadows.")
|
326 |
-
ans5
|
327 |
-
|
328 |
-
ans6 = make_predictions("I'm really impressed that Ashley can speak 7 languages, whereas I only speak one!")
|
329 |
-
ans6
|
330 |
-
|
331 |
-
ans7 = make_predictions("We are delighted that you will be coming to visit us. It will be so nice to have you here.")
|
332 |
-
ans7
|
333 |
-
|
334 |
-
ans8 = make_predictions("She’s flying high after the successful product launch.")
|
335 |
-
ans8
|
336 |
-
|
337 |
-
ans9 = make_predictions("I am anxious to hear back about the job interview I had on Friday. I hope I get the job!")
|
338 |
-
ans9
|
339 |
-
|
340 |
-
"""# Automatic speech recognition using Whisper"""
|
341 |
-
|
342 |
-
!pip install h5py
|
343 |
-
!pip install typing-extensions==4.9.0
|
344 |
-
!pip install cohere
|
345 |
-
!pip install wheel
|
346 |
-
!pip install openai
|
347 |
-
|
348 |
-
! pip install git+https://github.com/openai/whisper.git -q
|
349 |
-
|
350 |
-
import whisper
|
351 |
-
|
352 |
-
model = whisper.load_model("base")
|
353 |
-
|
354 |
-
model.device
|
355 |
-
|
356 |
-
import IPython
|
357 |
-
from IPython.display import Audio
|
358 |
-
|
359 |
-
import pickle
|
360 |
-
|
361 |
-
# save the iris classification model as a pickle file
|
362 |
-
model_pkl_file = "VED.pkl"
|
363 |
-
|
364 |
-
with open(model_pkl_file, 'wb') as file:
|
365 |
-
pickle.dump(lstm_model, file)
|
366 |
-
|
367 |
-
"""# UI building using Gradio"""
|
368 |
-
|
369 |
-
!pip install jiwer
|
370 |
-
!pip install gradio==3.50 typing-extensions
|
371 |
-
|
372 |
-
!pip install fastapi==0.103.2
|
373 |
-
import gradio as gr
|
374 |
-
import time
|
375 |
-
|
376 |
-
def transcribe(audio_file):
|
377 |
-
model = whisper.load_model("base")
|
378 |
-
result= model.transcribe(audio_file)
|
379 |
-
result_string = result['text']
|
380 |
-
detected_emotion = make_predictions(result_string)
|
381 |
-
# tt = result['text']
|
382 |
-
# review=[]
|
383 |
-
# review.append(tt)
|
384 |
-
# sent = predict(str(result))
|
385 |
-
|
386 |
-
# # result_string = str(strg)
|
387 |
-
# Sentiment = predict_sentiment(strg)
|
388 |
-
# return Sentiment
|
389 |
-
return detected_emotion
|
390 |
-
|
391 |
-
gr.Interface(
|
392 |
-
title = 'Audio based Sentiment analysis',
|
393 |
-
fn=transcribe,
|
394 |
-
inputs=[
|
395 |
-
gr.inputs.Audio(source="microphone", type="filepath")
|
396 |
-
],
|
397 |
-
outputs=[
|
398 |
-
"text"
|
399 |
-
],
|
400 |
-
live=True,debug=True).launch(debug=True)
|
401 |
-
|
402 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import soundfile
|
2 |
+
import numpy as np
|
3 |
+
import librosa
|
4 |
+
import glob
|
5 |
+
import os
|
6 |
+
from sklearn.model_selection import train_test_split
|
7 |
+
|
8 |
+
# all emotions on RAVDESS dataset
|
9 |
+
int2emotion = {
|
10 |
+
"01": "neutral",
|
11 |
+
"02": "calm",
|
12 |
+
"03": "happy",
|
13 |
+
"04": "sad",
|
14 |
+
"05": "angry",
|
15 |
+
"06": "fearful",
|
16 |
+
"07": "disgust",
|
17 |
+
"08": "surprised"
|
18 |
+
}
|
19 |
+
|
20 |
+
# we allow only these emotions
|
21 |
+
AVAILABLE_EMOTIONS = {
|
22 |
+
"angry",
|
23 |
+
"sad",
|
24 |
+
"neutral",
|
25 |
+
"happy"
|
26 |
+
}
|
27 |
+
|
28 |
+
def extract_feature(file_name, **kwargs):
|
29 |
+
"""
|
30 |
+
Extract feature from audio file `file_name`
|
31 |
+
Features supported:
|
32 |
+
- MFCC (mfcc)
|
33 |
+
- Chroma (chroma)
|
34 |
+
- MEL Spectrogram Frequency (mel)
|
35 |
+
- Contrast (contrast)
|
36 |
+
- Tonnetz (tonnetz)
|
37 |
+
e.g:
|
38 |
+
`features = extract_feature(path, mel=True, mfcc=True)`
|
39 |
+
"""
|
40 |
+
mfcc = kwargs.get("mfcc")
|
41 |
+
chroma = kwargs.get("chroma")
|
42 |
+
mel = kwargs.get("mel")
|
43 |
+
contrast = kwargs.get("contrast")
|
44 |
+
tonnetz = kwargs.get("tonnetz")
|
45 |
+
with soundfile.SoundFile(file_name) as sound_file:
|
46 |
+
X = sound_file.read(dtype="float32")
|
47 |
+
sample_rate = sound_file.samplerate
|
48 |
+
if chroma or contrast:
|
49 |
+
stft = np.abs(librosa.stft(X))
|
50 |
+
result = np.array([])
|
51 |
+
if mfcc:
|
52 |
+
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
|
53 |
+
result = np.hstack((result, mfccs))
|
54 |
+
if chroma:
|
55 |
+
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
|
56 |
+
result = np.hstack((result, chroma))
|
57 |
+
if mel:
|
58 |
+
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
|
59 |
+
result = np.hstack((result, mel))
|
60 |
+
if contrast:
|
61 |
+
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
|
62 |
+
result = np.hstack((result, contrast))
|
63 |
+
if tonnetz:
|
64 |
+
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
|
65 |
+
result = np.hstack((result, tonnetz))
|
66 |
+
return result
|
67 |
+
|
68 |
+
|
69 |
+
def load_data(test_size=0.2):
|
70 |
+
X, y = [], []
|
71 |
+
for file in glob.glob("data/Actor_*/*.wav"):
|
72 |
+
# get the base name of the audio file
|
73 |
+
basename = os.path.basename(file)
|
74 |
+
# get the emotion label
|
75 |
+
emotion = int2emotion[basename.split("-")[2]]
|
76 |
+
# we allow only AVAILABLE_EMOTIONS we set
|
77 |
+
if emotion not in AVAILABLE_EMOTIONS:
|
78 |
+
continue
|
79 |
+
# extract speech features
|
80 |
+
features = extract_feature(file, mfcc=True, chroma=True, mel=True)
|
81 |
+
# add to data
|
82 |
+
X.append(features)
|
83 |
+
y.append(emotion)
|
84 |
+
# split the data to training and testing and return it
|
85 |
+
return train_test_split(np.array(X), y, test_size=test_size, random_state=7)
|