Spaces:

Monke64
/

TechJamM2I

Paused

App Files Files Community

TechJamM2I / flask /Genre_spotting_service.py

Monke64

First commit

1580527 8 months ago

raw

history blame

2.54 kB

	import librosa
	import numpy as np
	import tensorflow as tf
	#import argparse
	import json
	import tensorflow.keras as keras

	# Input dimension for model = 259*13

	mappings = ["blues","classical","country","disco","hip-hop","jazz","metal","pop","reggae","rock"]
	model_path = "Genre_classifier_model.h5"
	class _Genre_spotting_service():
	instance = None
	model = None
	mappings = ["blues", "classical", "country", "disco", "hip-hop", "jazz", "metal", "pop", "reggae", "rock"]

	def __init__(self,model_path):
	self.model = tf.keras.models.load_model(model_path)
	def predict(self,file_path):
	input_data = self.preprocess_audio(file_path)
	input_data = input_data[np.newaxis, ...]
	predictions = self.model.predict(input_data)
	text_predictions = [mappings[np.argmax(predictions, axis=1)[0]]]
	return text_predictions

	def preprocess_audio(self,file_path, target_frames=259, n_mfcc=13, sr=22050, n_fft=2048, hop_length=512):
	# Load audio file
	y, sr = librosa.load(file_path, sr=sr)

	# Compute MFCCs
	mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)

	# Pad or truncate MFCCs to ensure they have target_frames
	if mfccs.shape[1] < target_frames:
	pad_width = target_frames - mfccs.shape[1]
	mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
	else:
	mfccs = mfccs[:, :target_frames]
	return mfccs.T


	def Genre_spotting_service():
	if _Genre_spotting_service.instance == None:
	_Genre_spotting_service.instance = _Genre_spotting_service()
	_Genre_spotting_service.model = keras.models.load_model(model_path)
	return _Genre_spotting_service.instance

	# if __name__ == "__main__":
	# genre_service = _Genre_spotting_service("Genre_classifier_model.h5")
	# predicted_genre = genre_service.predict("10.mp3")
	# print(predicted_genre)


	# file_path = "10.mp3"
	# target_frames=259
	# n_mfcc=13
	# sr=22050
	# n_fft=2048
	# hop_length=512
	# # Load audio file
	# y, sr = librosa.load(file_path, sr=sr)
	#
	# # Compute MFCCs
	# mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
	# print(str(sr))
	#
	# # Pad or truncate MFCCs to ensure they have target_frames
	# if mfccs.shape[1] < target_frames:
	# pad_width = target_frames - mfccs.shape[1]
	# mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
	# else:
	# mfccs = mfccs[:, :target_frames]
	# print(mfccs)