seayala's picture
Update app.py
1db643b verified
raw
history blame
2.48 kB
import gradio as gr
import tensorflow as tf
import librosa
import numpy as np
import tempfile
# Diccionario de etiquetas
labels = [
'cat', 'house', 'marvin', 'nine', 'one', 'eight', 'three', 'five', 'zero', 'four',
'tree', 'wow', 'six', 'off', 'bed', 'seven', 'down', 'happy', 'on', 'yes',
'two', 'right', 'left', 'stop', 'go', 'no', 'sheila', 'up', 'bird', 'dog'
]
def extract_features(file_name):
try:
# Resamplea a 16kHz
audio, sample_rate = librosa.load(file_name, sr=16000)
# Saca Mel-spectrograma
mel_spectrogram = librosa.feature.melspectrogram(
y=audio,
sr=sample_rate,
n_mels=257,
n_fft=512,
hop_length=256
)
# Convierte a escala logarítmica
log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
# Ajusta tamaño exacto
log_mel_spectrogram = librosa.util.fix_length(log_mel_spectrogram, size=257, axis=0)
log_mel_spectrogram = librosa.util.fix_length(log_mel_spectrogram, size=97, axis=1)
# Normaliza
log_mel_spectrogram = (log_mel_spectrogram - np.mean(log_mel_spectrogram)) / np.std(log_mel_spectrogram)
# Añade canal
log_mel_spectrogram = log_mel_spectrogram[..., np.newaxis]
except Exception as e:
print(f"Error encountered while parsing file: {file_name}")
print(e)
return None
return log_mel_spectrogram
def classify_audio(audio_file):
print(f"Tipo de audio_file: {type(audio_file)}")
# El tipo es string (ruta), no hace falta leer ni escribir en temp files
file_path = audio_file
# Extrae características
features = extract_features(file_path)
if features is None:
return "Error al procesar el audio"
# Añade batch dimension
features = features[np.newaxis, ...] # (1, 97, 257, 1)
# Carga el modelo en CPU
model = tf.keras.models.load_model('my_model.h5', compile=False)
with tf.device('/CPU:0'):
prediction = model.predict(features)
predicted_label_index = np.argmax(prediction)
predicted_label = labels[predicted_label_index]
return predicted_label
iface = gr.Interface(
fn=classify_audio,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Clasificación de audio simple",
description="Sube un archivo de audio para clasificarlo."
)
iface.launch()