import librosa import librosa.filters import numpy as np import scipy from scipy.io import wavfile import soundfile as sf import logging logger = logging.getLogger(__name__) def load_wav(path, sr): try: wav, _ = librosa.core.load(path, sr=sr) return wav except Exception as e: logger.error(f"Error al cargar audio {path}: {str(e)}") raise def save_wav(wav, path, sr): try: wav *= 32767 / max(0.01, np.max(np.abs(wav))) wavfile.write(path, sr, wav.astype(np.int16)) except Exception as e: logger.error(f"Error al guardar audio {path}: {str(e)}") raise def save_wavenet_wav(wav, path, sr): try: sf.write(path, wav.astype(np.float32), sr) except Exception as e: logger.error(f"Error al guardar audio wavenet {path}: {str(e)}") raise def preemphasis(wav, k, preemphasize=True): if preemphasize: return scipy.signal.lfilter([1, -k], [1], wav) return wav def inv_preemphasis(wav, k, inv_preemphasize=True): if inv_preemphasize: return scipy.signal.lfilter([1], [1, -k], wav) return wav def get_hop_size(): return 200 def linearspectrogram(wav): D = _stft(preemphasis(wav, 0.97)) S = _amp_to_db(np.abs(D)) return _normalize(S) def melspectrogram(wav, sr=16000): D = _stft(preemphasis(wav, 0.97)) S = _amp_to_db(_linear_to_mel(np.abs(D), sr)) if np.isnan(S).any(): raise ValueError("El espectrograma contiene valores NaN") S = _normalize(S) # Asegurar dimensiones correctas (80, T) if len(S.shape) == 1: S = S.reshape(80, -1) elif S.shape[0] != 80: S = S.T return S def _stft(y): n_fft = 800 hop_length = 200 win_length = 800 return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length) def _linear_to_mel(spectrogram, sr): _mel_basis = _build_mel_basis(sr) return np.dot(_mel_basis, spectrogram) def _build_mel_basis(sr): n_fft = 800 n_mels = 80 fmin = 80 fmax = 7600 return librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax) def _amp_to_db(x): return 20 * np.log10(np.maximum(1e-5, x)) def _db_to_amp(x): return np.power(10.0, x * 0.05) def _normalize(S): return np.clip((S + 100) / 100, 0, 1) def _denormalize(D): return (D * 100) - 100