import tensorflow.keras as keras import numpy as np import librosa import random #import tensorflow_addons SAMPLE_RATE = 22050 class _Emotion_spotting_service(): model = None #instance = None mapping = [' amazement', ' solemnity', ' tenderness', ' nostalgia', ' calmness', ' power', ' joyfulness', ' tension',' sadness'] def __init__(self,model_path): self.model = keras.models.load_model(model_path) def predict(self,file_path): log_spectrogram = self.preprocess(file_path) X = np.array(log_spectrogram).astype("float32") X = np.expand_dims(X, axis=0) # Do predictions num_predictions = self.model.predict(X) prediction = np.argmax(num_predictions) predicted_keyword = self.mapping[prediction] return predicted_keyword # Split audio into 10 second excerpts # Attain log spectrogram with following parameters # sample rate = 22050, n_fft = 2048, hop_length = 512 # output, 1024*431 def preprocess(self,file_path): signal, sr = librosa.load(file_path,sr=SAMPLE_RATE) signal_normalized = librosa.util.normalize(signal) len_to_check = 10 * 22050 # If audio is less than 10 seconds, we pad it with zeroes # If audio is more than 10 seconds, we split into segments and randomly choose one if len(signal_normalized) < len_to_check: num_zeros = len_to_check - len(signal_normalized) signal_normalized = signal_normalized + [0] * num_zeros elif len(signal_normalized) > len_to_check: num_segments = len(signal_normalized)//len_to_check segments = [] for i in range(num_segments): start = i * len_to_check end = start + len_to_check if len(signal[start:end]) != len_to_check: continue else: segments.append(signal[start:end]) signal_normalized = random.choice(segments) stft = librosa.stft(signal_normalized, n_fft=2048,hop_length=512)[:-1] spectrogram = np.abs(stft) log_spectrogram = librosa.amplitude_to_db(spectrogram) return log_spectrogram # def Emotion_spotting_service(): # if _Emotion_spotting_service.instance == None: # _Emotion_spotting_service.instance = _Emotion_spotting_service() # _Emotion_spotting_service.model = keras.models.load_model("ERM.h5") # return _Emotion_spotting_service.instance # if __name__ == "__main__": # emotion_service = _Emotion_spotting_service("emotion_model.h5") # predicted_word = emotion_service.predict("10.mp3") # print(predicted_word)