Spaces:

Guhanselvam
/

Voice_Emotion_Detection

Build error

App Files Files Community

Guhanselvam commited on Nov 13, 2024

Commit

7ee8909

verified ·

1 Parent(s): 3c17b58

Rename minor_project_3.py to utils.py

Browse files

Files changed (2) hide show

minor_project_3.py +0 -402
utils.py +85 -0

minor_project_3.py DELETED Viewed

@@ -1,402 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Minor_project_3
-Automatically generated by Colaboratory.
-Original file is located at
-    https://colab.research.google.com/drive/1rvtDpt05RT7jCrMVUg-UyLcUjcnhI9TO
-# Importing libraries
-"""
-!pip install nlp
-!pip install datasets
-import tensorflow as tf
-import numpy as np
-import pandas as pd
-from wordcloud import WordCloud
-import seaborn as sns
-import matplotlib.pyplot as plt
-import plotly.express as px
-import plotly.graph_objects as go
-import nltk
-from nltk.tokenize import word_tokenize
-from nltk.corpus import stopwords
-from nltk.stem import WordNetLemmatizer
-import nlp
-from tensorflow.keras.preprocessing.text import Tokenizer
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-from keras.layers import Dense, Dropout
-from keras.layers import LSTM
-from keras.models import Sequential
-from keras.layers import Embedding
-from keras.layers import Flatten
-from keras.layers import Bidirectional
-from keras.callbacks import EarlyStopping
-from keras.layers import GlobalAvgPool1D
-import random
-import os
-!pip install -U datasets
-"""# Importing the dataset
-"""
-# Importing the dataset
-from datasets import load_dataset
-data = load_dataset("emotion", download_mode="force_redownload")
-# Converting the train, validation and test datasets into DataFrame format
-train = pd.DataFrame(data['train'])
-validation = pd.DataFrame(data['validation'])
-test = pd.DataFrame(data['test'])
-"""We will be detecting five emotions which are: sadness, anger, love, surprise, fear, joy using DNN
-> Indented block
-"""
-# Train dataset
-train.head(10)
-train['label'].unique()
-# emotion_mapping = {
-#     0: 'sadness',
-#     1: 'joy',
-#     2: 'love',
-#     3: 'anger',
-#     4: 'fear',
-#     5: 'surprise'
-# }
-# # Replace numeric labels with textual emotions
-# train['label'] = train['label'].map(emotion_mapping).fillna('Unknown')
-# train.head()
-# train['label'] = {0: "sadness",1: "joy", 2:"love",3:"anger",4:"fear",5:"surprise"}
-# train.head()
-train['length_of_text'] = [len(i.split(' ')) for i in train['text']]
-fig = px.histogram(train['length_of_text'], marginal='box',
-                   labels={"value": "Length of the Text"})
-fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
-fig.update_layout(title_text='Distribution of the Length of the Texts',
-                  title_x=0.5, title_font=dict(size=22))
-fig.show()
-fig = px.histogram(train['length_of_text'], marginal='box',
-                   labels={"value": "Length of the Text"},
-                   color=train['label'])
-fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
-fig.update_layout(title_text='Distribution of the Length of the Texts by Emotions',
-                  title_x=0.5, title_font=dict(size=22))
-fig.show()
-fig = px.histogram(train, x='label', color='label')
-fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
-fig.update_layout(title_text='Distribution of the Labels',
-                  title_x=0.5, title_font=dict(size=22))
-fig.show()
-nltk.download('punkt')
-nltk.download('stopwords')
-"""# Tokenization"""
-def tokenization(inputs):
-    return word_tokenize(inputs) #REFERENCE[1]
-train['text_tokenized'] = train['text'].apply(tokenization)
-validation['text_tokenized'] = validation['text'].apply(tokenization)
-train.head()
-"""# Stopword removal"""
-stop_words = set(stopwords.words('english'))
-def stopwords_remove(inputs):
-    return [item for item in inputs if item not in stop_words]
-train['text_stop'] = train['text_tokenized'].apply(stopwords_remove)
-validation['text_stop'] = validation['text_tokenized'].apply(stopwords_remove)
-train.head()
-"""# Lemmatization"""
-nltk.download('wordnet')
-lemmatizer = WordNetLemmatizer()
-def lemmatization(inputs):
-    return [lemmatizer.lemmatize(word=x, pos='v') for x in inputs]
-train['text_lemmatized'] = train['text_stop'].apply(lemmatization)
-validation['text_lemmatized'] = validation['text_stop'].apply(lemmatization)
-train.head()
-train['text_cleaned'] = train['text_lemmatized'].str.join(' ')
-validation['text_cleaned'] = validation['text_lemmatized'].str.join(' ')
-train.head() # Final form of the dataset
-WordCloud = WordCloud(max_words=100,
-                      random_state=30,
-                      collocations=True).generate(str((train['text_cleaned'])))
-plt.figure(figsize=(15, 8))
-plt.imshow(WordCloud, interpolation='bilinear')
-plt.axis("off")
-plt.show()
-num_words = 10000
-tokenizer = Tokenizer(num_words=num_words, oov_token='<OOV>')
-tokenizer.fit_on_texts(train['text_cleaned'])
-word_index = tokenizer.word_index
-# print(word_index)
-Tokenized_train = tokenizer.texts_to_sequences(train['text_cleaned'])
-Tokenized_val = tokenizer.texts_to_sequences(validation['text_cleaned'])
-print('Non-tokenized Version: ', train['text_cleaned'][0])
-print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][0]]))
-print('--'*50)
-print('Non-tokenized Version: ', train['text_cleaned'][10])
-print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][10]]))
-print('--'*50)
-print('Non-tokenized Version: ', train['text'][100])
-print('Tokenized Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][100]]))
-maxlen = 40
-Padded_train = pad_sequences(Tokenized_train, maxlen=maxlen, padding='pre')
-Padded_val = pad_sequences(Tokenized_val, maxlen=maxlen, padding='pre')
-print('Non-padded Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][0]]))
-print('Padded Version: ', Padded_train[0])
-print('--'*50)
-print('Non-padded Version: ', tokenizer.texts_to_sequences([train['text_cleaned'][10]]))
-print('Padded Version: ', Padded_train[10])
-"""# Model building"""
-lstm_model = Sequential()
-lstm_model.add(Embedding(num_words, 16, input_length=maxlen))
-lstm_model.add(GlobalAvgPool1D())
-tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(50, return_sequences=True, activation='relu'))
-lstm_model.add(Dropout(0.3))
-tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, activation='relu', return_sequences=True))
-lstm_model.add(Dropout(0.3))
-tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(40, activation='relu'))
-lstm_model.add(Dropout(0.3))
-lstm_model.add(Dense(6, activation='softmax'))
-lstm_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
-lstm_model.summary()
-train['label'] = train['label']
-validation['label'] = validation['label']
-train.head()
-early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', mode='auto', patience=5,
-                                                 restore_best_weights=True)
-epochs = 100
-hist = lstm_model.fit(Padded_train, train['label'], epochs=epochs,
-                 validation_data=(Padded_val, validation['label']),
-                 callbacks=[early_stopping])
-plt.figure(figsize=(15, 8))
-plt.plot(hist.history['loss'], label='Train Loss')
-plt.plot(hist.history['val_loss'], label='Validation Loss')
-plt.title('Train and Validation Loss Graphs')
-plt.xlabel('Epochs')
-plt.ylabel('Loss')
-plt.legend()
-test['text_tokenized'] = test['text'].apply(tokenization)
-test['text_stop'] = test['text_tokenized'].apply(stopwords_remove)
-test['text_lemmatized'] = test['text_stop'].apply(lemmatization)
-test['text_cleaned'] = test['text_lemmatized'].str.join(' ')
-Tokenized_test = tokenizer.texts_to_sequences(test['text_cleaned'])
-Padded_test = pad_sequences(Tokenized_test, maxlen=maxlen, padding='pre')
-test['label'] = test['label']
-test_evaluate = lstm_model.evaluate(Padded_test, test['label'])
-test.head()
-import pandas as pd
-import numpy as np
-from sklearn.metrics import confusion_matrix
-import seaborn as sns
-import matplotlib.pyplot as plt
-from keras.preprocessing.sequence import pad_sequences
-# Assuming you have imported necessary libraries, defined Tokenized_test, Padded_test, and test dataframe
-# Make predictions on test data
-predictions = lstm_model.predict(Padded_test)
-predicted_labels = np.argmax(predictions, axis=1)  # Assuming the output is categorical
-# Compute confusion matrix
-cm = confusion_matrix(test['label'], predicted_labels)
-# Plot confusion matrix as a heatmap
-def plot_confusion_matrix(conf_matrix, class_names):
-    plt.figure(figsize=(8, 6))
-    sns.heatmap(conf_matrix, annot=True, cmap='Blues', fmt='g', xticklabels=class_names, yticklabels=class_names)
-    plt.xlabel('Predicted Labels')
-    plt.ylabel('True Labels')
-    plt.title('Confusion Matrix')
-    plt.show()
-# Define class names (labels)
-class_names = np.unique(test['label'])
-# Plot confusion matrix
-plot_confusion_matrix(cm, class_names)
-"""# Making predictions"""
-def make_predictions(text_input):
-    text_input = str(text_input)
-    text_input = tokenization(text_input)
-    text_input = stopwords_remove(text_input)
-    text_input = lemmatization(text_input)
-    text_input = ' '.join(text_input)
-    text_input = tokenizer.texts_to_sequences([text_input])
-    text_input = pad_sequences(text_input, maxlen=maxlen, padding='pre')
-    text_input = np.argmax(lstm_model.predict(text_input))
-    if text_input == 0:
-        return 'Predicted Emotion: Sadness'
-    elif text_input == 1:
-        return 'Predicted Emotion: Joy'
-    elif text_input == 2:
-        return 'Predicted Emotion: Love'
-    elif text_input == 3:
-        return 'Predicted Emotion: Anger'
-    elif text_input == 4:
-        return 'Predicted Emotion: Fear'
-    else:
-        return 'Predicted Emotion: Surprise'
-    return text_input
-# label_ = {0: "Sadness", 1: "Joy", 2: "Love", 3: "Anger", 4: "Fear", 5: "Surprise"}
-# test['label'] = test['label'].replace(label_)
-# # Randomly chosen Test Dataset data points
-# i = random.randint(0, len(test) - 1)
-# print('Test Text:', test['text'][i])
-# print(' ')
-# print('Actual Emotion:', test['label'][i])
-# make_predictions(test['text'][i])
-# print('-'*50)
-# print('Test Text:', test['text'][i+1])
-# print(' ')
-# print('Actual Emotion:', test['label'][i+1])
-# make_predictions(test['text'][i+1])
-ans = make_predictions('No one told you when to run, you missed the starting gun')
-ans
-ans2 = make_predictions("I just asked one question to confirm his request, and my boss bit my head off.")
-ans2
-ans3 = make_predictions("She’s flying high after the successful product launch.")
-ans3
-ans4 = make_predictions("I’m going to have the first meeting with a big client tomorrow, and I’m feeling butterflies in my stomach")
-ans4
-ans5 = make_predictions("Sometimes the people who appear to be the most confident are actually afraid of their own shadows.")
-ans5
-ans6 = make_predictions("I'm really impressed that Ashley can speak 7 languages, whereas I only speak one!")
-ans6
-ans7 = make_predictions("We are delighted that you will be coming to visit us. It will be so nice to have you here.")
-ans7
-ans8 = make_predictions("She’s flying high after the successful product launch.")
-ans8
-ans9 = make_predictions("I am anxious to hear back about the job interview I had on Friday. I hope I get the job!")
-ans9
-"""# Automatic speech recognition using Whisper"""
-!pip install h5py
-!pip install typing-extensions==4.9.0
-!pip install cohere
-!pip install wheel
-!pip install openai
-! pip install git+https://github.com/openai/whisper.git -q
-import whisper
-model = whisper.load_model("base")
-model.device
-import IPython
-from IPython.display import Audio
-import pickle
-# save the iris classification model as a pickle file
-model_pkl_file = "VED.pkl"
-with open(model_pkl_file, 'wb') as file:
-    pickle.dump(lstm_model, file)
-"""# UI building using Gradio"""
-!pip install jiwer
-!pip install gradio==3.50 typing-extensions
-!pip install fastapi==0.103.2
-import gradio as gr
-import time
-def transcribe(audio_file):
-    model = whisper.load_model("base")
-    result= model.transcribe(audio_file)
-    result_string = result['text']
-    detected_emotion = make_predictions(result_string)
-    # tt = result['text']
-    # review=[]
-    # review.append(tt)
-    # sent = predict(str(result))
-    # # result_string = str(strg)
-    # Sentiment = predict_sentiment(strg)
-    # return Sentiment
-    return detected_emotion
-gr.Interface(
-    title = 'Audio based Sentiment analysis',
-    fn=transcribe,
-    inputs=[
-        gr.inputs.Audio(source="microphone", type="filepath")
-    ],
-    outputs=[
-        "text"
-    ],
-    live=True,debug=True).launch(debug=True)

utils.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import soundfile
+import numpy as np
+import librosa
+import glob
+import os
+from sklearn.model_selection import train_test_split
+# all emotions on RAVDESS dataset
+int2emotion = {
+    "01": "neutral",
+    "02": "calm",
+    "03": "happy",
+    "04": "sad",
+    "05": "angry",
+    "06": "fearful",
+    "07": "disgust",
+    "08": "surprised"
+}
+# we allow only these emotions
+AVAILABLE_EMOTIONS = {
+    "angry",
+    "sad",
+    "neutral",
+    "happy"
+}
+def extract_feature(file_name, **kwargs):
+    """
+    Extract feature from audio file `file_name`
+        Features supported:
+            - MFCC (mfcc)
+            - Chroma (chroma)
+            - MEL Spectrogram Frequency (mel)
+            - Contrast (contrast)
+            - Tonnetz (tonnetz)
+        e.g:
+        `features = extract_feature(path, mel=True, mfcc=True)`
+    """
+    mfcc = kwargs.get("mfcc")
+    chroma = kwargs.get("chroma")
+    mel = kwargs.get("mel")
+    contrast = kwargs.get("contrast")
+    tonnetz = kwargs.get("tonnetz")
+    with soundfile.SoundFile(file_name) as sound_file:
+        X = sound_file.read(dtype="float32")
+        sample_rate = sound_file.samplerate
+        if chroma or contrast:
+            stft = np.abs(librosa.stft(X))
+        result = np.array([])
+        if mfcc:
+            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
+            result = np.hstack((result, mfccs))
+        if chroma:
+            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
+            result = np.hstack((result, chroma))
+        if mel:
+            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
+            result = np.hstack((result, mel))
+        if contrast:
+            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
+            result = np.hstack((result, contrast))
+        if tonnetz:
+            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
+            result = np.hstack((result, tonnetz))
+    return result
+def load_data(test_size=0.2):
+    X, y = [], []
+    for file in glob.glob("data/Actor_*/*.wav"):
+        # get the base name of the audio file
+        basename = os.path.basename(file)
+        # get the emotion label
+        emotion = int2emotion[basename.split("-")[2]]
+        # we allow only AVAILABLE_EMOTIONS we set
+        if emotion not in AVAILABLE_EMOTIONS:
+            continue
+        # extract speech features
+        features = extract_feature(file, mfcc=True, chroma=True, mel=True)
+        # add to data
+        X.append(features)
+        y.append(emotion)
+    # split the data to training and testing and return it
+    return train_test_split(np.array(X), y, test_size=test_size, random_state=7)