Sound_VAE / tools.py
WeixuanYuan's picture
Upload 31 files
b88cc47
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib
import librosa
from scipy.io.wavfile import write
k = 1e-16
def np_log10(x):
numerator = np.log(x + 1e-16)
denominator = np.log(10)
return numerator / denominator
def sigmoid(x):
s = 1 / (1 + np.exp(-x))
return s
def inv_sigmoid(s):
x = np.log((s / (1 - s)) + 1e-16)
return x
def spc_to_VAE_input(spc):
"""Restrict value range from 0 to 1."""
return spc / (1 + spc)
def VAE_out_put_to_spc(o):
"""Inverse transform of function 'spc_to_VAE_input'."""
return o / (1 - o + k)
def denoise(spc):
"""Filter back ground noise. (Not used.)"""
return np.maximum(0, spc - (2e-5))
hop_length = 256
win_length = 1024
def np_power_to_db(S, amin=1e-16, top_db=80.0):
"""Helper method for scaling."""
ref = np.max(S)
# set fixed value for ref
# 每个元素取max
log_spec = 10.0 * np_log10(np.maximum(amin, S))
log_spec -= 10.0 * np_log10(np.maximum(amin, ref))
log_spec = np.maximum(log_spec, np.max(log_spec) - top_db)
return log_spec
def show_spc(spc, resolution=(512, 256)):
"""Show a spectrogram."""
spc = np.reshape(spc, resolution)
magnitude_spectrum = np.abs(spc)
log_spectrum = np_power_to_db(magnitude_spectrum)
plt.imshow(np.flipud(log_spectrum))
plt.show()
def save_results(spectrogram, spectrogram_image_path, waveform_path):
"""Save the input 'spectrogram' and its waveform (reconstructed bu Griffin Lim)
to path provided by 'spectrogram_image_path' and 'waveform_path'."""
# save image
magnitude_spectrum = np.abs(spectrogram)
log_spc = np_power_to_db(magnitude_spectrum)
log_spc = np.reshape(log_spc, (512, 256))
matplotlib.pyplot.imsave(spectrogram_image_path, log_spc, vmin=-100, vmax=0,
origin='lower')
# save waveform
abs_spec = np.zeros((513, 256))
abs_spec[:512, :] = abs_spec[:512, :] + np.sqrt(np.reshape(spectrogram, (512, 256)))
rec_signal = librosa.griffinlim(abs_spec, n_iter=32, hop_length=256, win_length=1024)
write(waveform_path, 16000, rec_signal)