Guhanselvam commited on
Commit
7ee8909
·
verified ·
1 Parent(s): 3c17b58

Rename minor_project_3.py to utils.py

Browse files
Files changed (2) hide show
  1. minor_project_3.py +0 -402
  2. utils.py +85 -0
minor_project_3.py DELETED
@@ -1,402 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """Minor_project_3
3
- Automatically generated by Colaboratory.
4
- Original file is located at
5
- https://colab.research.google.com/drive/1rvtDpt05RT7jCrMVUg-UyLcUjcnhI9TO
6
- # Importing libraries
7
- """
8
-
9
- !pip install nlp
10
- !pip install datasets
11
- import tensorflow as tf
12
- import numpy as np
13
- import pandas as pd
14
- from wordcloud import WordCloud
15
- import seaborn as sns
16
- import matplotlib.pyplot as plt
17
- import plotly.express as px
18
- import plotly.graph_objects as go
19
- import nltk
20
- from nltk.tokenize import word_tokenize
21
- from nltk.corpus import stopwords
22
- from nltk.stem import WordNetLemmatizer
23
- import nlp
24
- from tensorflow.keras.preprocessing.text import Tokenizer
25
- from tensorflow.keras.preprocessing.sequence import pad_sequences
26
- from keras.layers import Dense, Dropout
27
- from keras.layers import LSTM
28
- from keras.models import Sequential
29
- from keras.layers import Embedding
30
- from keras.layers import Flatten
31
- from keras.layers import Bidirectional
32
- from keras.callbacks import EarlyStopping
33
- from keras.layers import GlobalAvgPool1D
34
- import random
35
-
36
- import os
37
-
38
- !pip install -U datasets
39
-
40
- """# Importing the dataset
41
- """
42
-
43
- # Importing the dataset
44
- from datasets import load_dataset
45
-
46
- data = load_dataset("emotion", download_mode="force_redownload")
47
-
48
- # Converting the train, validation and test datasets into DataFrame format
49
- train = pd.DataFrame(data['train'])
50
- validation = pd.DataFrame(data['validation'])
51
- test = pd.DataFrame(data['test'])
52
-
53
- """We will be detecting five emotions which are: sadness, anger, love, surprise, fear, joy using DNN
54
- > Indented block
55
- """
56
-
57
- # Train dataset
58
- train.head(10)
59
-
60
- train['label'].unique()
61
-
62
- # emotion_mapping = {
63
- # 0: 'sadness',
64
- # 1: 'joy',
65
- # 2: 'love',
66
- # 3: 'anger',
67
- # 4: 'fear',
68
- # 5: 'surprise'
69
- # }
70
-
71
- # # Replace numeric labels with textual emotions
72
- # train['label'] = train['label'].map(emotion_mapping).fillna('Unknown')
73
- # train.head()
74
-
75
- # train['label'] = {0: "sadness",1: "joy", 2:"love",3:"anger",4:"fear",5:"surprise"}
76
- # train.head()
77
-
78
- train['length_of_text'] = [len(i.split(' ')) for i in train['text']]
79
-
80
- fig = px.histogram(train['length_of_text'], marginal='box',
81
- labels={"value": "Length of the Text"})
82
-
83
- fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
84
- fig.update_layout(title_text='Distribution of the Length of the Texts',
85
- title_x=0.5, title_font=dict(size=22))
86
- fig.show()
87
-
88
- fig = px.histogram(train['length_of_text'], marginal='box',
89
- labels={"value": "Length of the Text"},
90
- color=train['label'])
91
- fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
92
- fig.update_layout(title_text='Distribution of the Length of the Texts by Emotions',
93
- title_x=0.5, title_font=dict(size=22))
94
- fig.show()
95
-
96
- fig = px.histogram(train, x='label', color='label')
97
- fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
98
- fig.update_layout(title_text='Distribution of the Labels',
99
- title_x=0.5, title_font=dict(size=22))
100
- fig.show()
101
-
102
- nltk.download('punkt')
103
- nltk.download('stopwords')
104
-
105
- """# Tokenization"""
106
-
107
- def tokenization(inputs):
108
- return word_tokenize(inputs) #REFERENCE[1]
109
-
110
-
111
- train['text_tokenized'] = train['text'].apply(tokenization)
112
- validation['text_tokenized'] = validation['text'].apply(tokenization)
113
-
114
- train.head()
115
-
116
- """# Stopword removal"""
117
-
118
- stop_words = set(stopwords.words('english'))
119
-
120
- def stopwords_remove(inputs):
121
- return [item for item in inputs if item not in stop_words]
122
-
123
- train['text_stop'] = train['text_tokenized'].apply(stopwords_remove)
124
- validation['text_stop'] = validation['text_tokenized'].apply(stopwords_remove)
125
-
126
- train.head()
127
-
128
- """# Lemmatization"""
129
-
130
- nltk.download('wordnet')
131
-
132
- lemmatizer = WordNetLemmatizer()
133
-
134
- def lemmatization(inputs):
135
- return [lemmatizer.lemmatize(word=x, pos='v') for x in inputs]
136
-
137
- train['text_lemmatized'] = train['text_stop'].apply(lemmatization)
138
- validation['text_lemmatized'] = validation['text_stop'].apply(lemmatization)
139
-
140
- train.head()
141
-
142
- train['text_cleaned'] = train['text_lemmatized'].str.join(' ')
143
- validation['text_cleaned'] = validation['text_lemmatized'].str.join(' ')
144
-
145
- train.head() # Final form of the dataset
146
-
147
- WordCloud = WordCloud(max_words=100,
148
- random_state=30,
149
- collocations=True).generate(str((train['text_cleaned'])))
150
-
151
- plt.figure(figsize=(15, 8))
152
- plt.imshow(WordCloud, interpolation='bilinear')
153
- plt.axis("off")
154
- plt.show()
155
-
156
- num_words = 10000
157
- tokenizer = Tokenizer(num_words=num_words, oov_token='<OOV>')
158
- tokenizer.fit_on_texts(train['text_cleaned'])
159
-
160
- word_index = tokenizer.word_index
161
- # print(word_index)
162
-
163
- Tokenized_train = tokenizer.texts_to_sequences(train['text_cleaned'])
164
- Tokenized_val = tokenizer.texts_to_sequences(validation['text_cleaned'])
165
-
166
- print('Non-tokenized Version: ', train['text_cleaned'][0])
167
- print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][0]]))
168
- print('--'*50)
169
- print('Non-tokenized Version: ', train['text_cleaned'][10])
170
- print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][10]]))
171
- print('--'*50)
172
- print('Non-tokenized Version: ', train['text'][100])
173
- print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][100]]))
174
-
175
- maxlen = 40
176
- Padded_train = pad_sequences(Tokenized_train, maxlen=maxlen, padding='pre')
177
- Padded_val = pad_sequences(Tokenized_val, maxlen=maxlen, padding='pre')
178
-
179
- print('Non-padded Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][0]]))
180
- print('Padded Version: ', Padded_train[0])
181
- print('--'*50)
182
- print('Non-padded Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][10]]))
183
- print('Padded Version: ', Padded_train[10])
184
-
185
- """# Model building"""
186
-
187
- lstm_model = Sequential()
188
-
189
- lstm_model.add(Embedding(num_words, 16, input_length=maxlen))
190
- lstm_model.add(GlobalAvgPool1D())
191
-
192
- tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(50, return_sequences=True, activation='relu'))
193
- lstm_model.add(Dropout(0.3))
194
-
195
- tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, activation='relu', return_sequences=True))
196
- lstm_model.add(Dropout(0.3))
197
-
198
- tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, activation='relu'))
199
- lstm_model.add(Dropout(0.3))
200
-
201
- lstm_model.add(Dense(6, activation='softmax'))
202
-
203
- lstm_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
204
-
205
- lstm_model.summary()
206
-
207
- train['label'] = train['label']
208
- validation['label'] = validation['label']
209
- train.head()
210
-
211
- early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', mode='auto', patience=5,
212
- restore_best_weights=True)
213
-
214
- epochs = 100
215
- hist = lstm_model.fit(Padded_train, train['label'], epochs=epochs,
216
- validation_data=(Padded_val, validation['label']),
217
- callbacks=[early_stopping])
218
-
219
- plt.figure(figsize=(15, 8))
220
- plt.plot(hist.history['loss'], label='Train Loss')
221
- plt.plot(hist.history['val_loss'], label='Validation Loss')
222
- plt.title('Train and Validation Loss Graphs')
223
- plt.xlabel('Epochs')
224
- plt.ylabel('Loss')
225
- plt.legend()
226
-
227
- test['text_tokenized'] = test['text'].apply(tokenization)
228
- test['text_stop'] = test['text_tokenized'].apply(stopwords_remove)
229
- test['text_lemmatized'] = test['text_stop'].apply(lemmatization)
230
- test['text_cleaned'] = test['text_lemmatized'].str.join(' ')
231
-
232
- Tokenized_test = tokenizer.texts_to_sequences(test['text_cleaned'])
233
- Padded_test = pad_sequences(Tokenized_test, maxlen=maxlen, padding='pre')
234
-
235
- test['label'] = test['label']
236
-
237
- test_evaluate = lstm_model.evaluate(Padded_test, test['label'])
238
-
239
- test.head()
240
-
241
- import pandas as pd
242
- import numpy as np
243
- from sklearn.metrics import confusion_matrix
244
- import seaborn as sns
245
- import matplotlib.pyplot as plt
246
- from keras.preprocessing.sequence import pad_sequences
247
-
248
- # Assuming you have imported necessary libraries, defined Tokenized_test, Padded_test, and test dataframe
249
-
250
- # Make predictions on test data
251
- predictions = lstm_model.predict(Padded_test)
252
- predicted_labels = np.argmax(predictions, axis=1) # Assuming the output is categorical
253
-
254
- # Compute confusion matrix
255
- cm = confusion_matrix(test['label'], predicted_labels)
256
-
257
- # Plot confusion matrix as a heatmap
258
- def plot_confusion_matrix(conf_matrix, class_names):
259
- plt.figure(figsize=(8, 6))
260
- sns.heatmap(conf_matrix, annot=True, cmap='Blues', fmt='g', xticklabels=class_names, yticklabels=class_names)
261
- plt.xlabel('Predicted Labels')
262
- plt.ylabel('True Labels')
263
- plt.title('Confusion Matrix')
264
- plt.show()
265
-
266
- # Define class names (labels)
267
- class_names = np.unique(test['label'])
268
-
269
- # Plot confusion matrix
270
- plot_confusion_matrix(cm, class_names)
271
-
272
- """# Making predictions"""
273
-
274
- def make_predictions(text_input):
275
- text_input = str(text_input)
276
- text_input = tokenization(text_input)
277
- text_input = stopwords_remove(text_input)
278
- text_input = lemmatization(text_input)
279
- text_input = ' '.join(text_input)
280
- text_input = tokenizer.texts_to_sequences([text_input])
281
- text_input = pad_sequences(text_input, maxlen=maxlen, padding='pre')
282
- text_input = np.argmax(lstm_model.predict(text_input))
283
- if text_input == 0:
284
- return 'Predicted Emotion: Sadness'
285
- elif text_input == 1:
286
- return 'Predicted Emotion: Joy'
287
- elif text_input == 2:
288
- return 'Predicted Emotion: Love'
289
- elif text_input == 3:
290
- return 'Predicted Emotion: Anger'
291
- elif text_input == 4:
292
- return 'Predicted Emotion: Fear'
293
- else:
294
- return 'Predicted Emotion: Surprise'
295
- return text_input
296
-
297
- # label_ = {0: "Sadness", 1: "Joy", 2: "Love", 3: "Anger", 4: "Fear", 5: "Surprise"}
298
- # test['label'] = test['label'].replace(label_)
299
-
300
- # # Randomly chosen Test Dataset data points
301
- # i = random.randint(0, len(test) - 1)
302
-
303
- # print('Test Text:', test['text'][i])
304
- # print(' ')
305
- # print('Actual Emotion:', test['label'][i])
306
- # make_predictions(test['text'][i])
307
- # print('-'*50)
308
- # print('Test Text:', test['text'][i+1])
309
- # print(' ')
310
- # print('Actual Emotion:', test['label'][i+1])
311
- # make_predictions(test['text'][i+1])
312
-
313
- ans = make_predictions('No one told you when to run, you missed the starting gun')
314
- ans
315
-
316
- ans2 = make_predictions("I just asked one question to confirm his request, and my boss bit my head off.")
317
- ans2
318
-
319
- ans3 = make_predictions("She’s flying high after the successful product launch.")
320
- ans3
321
-
322
- ans4 = make_predictions("I’m going to have the first meeting with a big client tomorrow, and I’m feeling butterflies in my stomach")
323
- ans4
324
-
325
- ans5 = make_predictions("Sometimes the people who appear to be the most confident are actually afraid of their own shadows.")
326
- ans5
327
-
328
- ans6 = make_predictions("I'm really impressed that Ashley can speak 7 languages, whereas I only speak one!")
329
- ans6
330
-
331
- ans7 = make_predictions("We are delighted that you will be coming to visit us. It will be so nice to have you here.")
332
- ans7
333
-
334
- ans8 = make_predictions("She’s flying high after the successful product launch.")
335
- ans8
336
-
337
- ans9 = make_predictions("I am anxious to hear back about the job interview I had on Friday. I hope I get the job!")
338
- ans9
339
-
340
- """# Automatic speech recognition using Whisper"""
341
-
342
- !pip install h5py
343
- !pip install typing-extensions==4.9.0
344
- !pip install cohere
345
- !pip install wheel
346
- !pip install openai
347
-
348
- ! pip install git+https://github.com/openai/whisper.git -q
349
-
350
- import whisper
351
-
352
- model = whisper.load_model("base")
353
-
354
- model.device
355
-
356
- import IPython
357
- from IPython.display import Audio
358
-
359
- import pickle
360
-
361
- # save the iris classification model as a pickle file
362
- model_pkl_file = "VED.pkl"
363
-
364
- with open(model_pkl_file, 'wb') as file:
365
- pickle.dump(lstm_model, file)
366
-
367
- """# UI building using Gradio"""
368
-
369
- !pip install jiwer
370
- !pip install gradio==3.50 typing-extensions
371
-
372
- !pip install fastapi==0.103.2
373
- import gradio as gr
374
- import time
375
-
376
- def transcribe(audio_file):
377
- model = whisper.load_model("base")
378
- result= model.transcribe(audio_file)
379
- result_string = result['text']
380
- detected_emotion = make_predictions(result_string)
381
- # tt = result['text']
382
- # review=[]
383
- # review.append(tt)
384
- # sent = predict(str(result))
385
-
386
- # # result_string = str(strg)
387
- # Sentiment = predict_sentiment(strg)
388
- # return Sentiment
389
- return detected_emotion
390
-
391
- gr.Interface(
392
- title = 'Audio based Sentiment analysis',
393
- fn=transcribe,
394
- inputs=[
395
- gr.inputs.Audio(source="microphone", type="filepath")
396
- ],
397
- outputs=[
398
- "text"
399
- ],
400
- live=True,debug=True).launch(debug=True)
401
-
402
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import soundfile
2
+ import numpy as np
3
+ import librosa
4
+ import glob
5
+ import os
6
+ from sklearn.model_selection import train_test_split
7
+
8
+ # all emotions on RAVDESS dataset
9
+ int2emotion = {
10
+ "01": "neutral",
11
+ "02": "calm",
12
+ "03": "happy",
13
+ "04": "sad",
14
+ "05": "angry",
15
+ "06": "fearful",
16
+ "07": "disgust",
17
+ "08": "surprised"
18
+ }
19
+
20
+ # we allow only these emotions
21
+ AVAILABLE_EMOTIONS = {
22
+ "angry",
23
+ "sad",
24
+ "neutral",
25
+ "happy"
26
+ }
27
+
28
+ def extract_feature(file_name, **kwargs):
29
+ """
30
+ Extract feature from audio file `file_name`
31
+ Features supported:
32
+ - MFCC (mfcc)
33
+ - Chroma (chroma)
34
+ - MEL Spectrogram Frequency (mel)
35
+ - Contrast (contrast)
36
+ - Tonnetz (tonnetz)
37
+ e.g:
38
+ `features = extract_feature(path, mel=True, mfcc=True)`
39
+ """
40
+ mfcc = kwargs.get("mfcc")
41
+ chroma = kwargs.get("chroma")
42
+ mel = kwargs.get("mel")
43
+ contrast = kwargs.get("contrast")
44
+ tonnetz = kwargs.get("tonnetz")
45
+ with soundfile.SoundFile(file_name) as sound_file:
46
+ X = sound_file.read(dtype="float32")
47
+ sample_rate = sound_file.samplerate
48
+ if chroma or contrast:
49
+ stft = np.abs(librosa.stft(X))
50
+ result = np.array([])
51
+ if mfcc:
52
+ mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
53
+ result = np.hstack((result, mfccs))
54
+ if chroma:
55
+ chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
56
+ result = np.hstack((result, chroma))
57
+ if mel:
58
+ mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
59
+ result = np.hstack((result, mel))
60
+ if contrast:
61
+ contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
62
+ result = np.hstack((result, contrast))
63
+ if tonnetz:
64
+ tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
65
+ result = np.hstack((result, tonnetz))
66
+ return result
67
+
68
+
69
+ def load_data(test_size=0.2):
70
+ X, y = [], []
71
+ for file in glob.glob("data/Actor_*/*.wav"):
72
+ # get the base name of the audio file
73
+ basename = os.path.basename(file)
74
+ # get the emotion label
75
+ emotion = int2emotion[basename.split("-")[2]]
76
+ # we allow only AVAILABLE_EMOTIONS we set
77
+ if emotion not in AVAILABLE_EMOTIONS:
78
+ continue
79
+ # extract speech features
80
+ features = extract_feature(file, mfcc=True, chroma=True, mel=True)
81
+ # add to data
82
+ X.append(features)
83
+ y.append(emotion)
84
+ # split the data to training and testing and return it
85
+ return train_test_split(np.array(X), y, test_size=test_size, random_state=7)