Spaces:

WeixuanYuan
/

Sound_VAE

Build error

App Files Files Community

Sound_VAE / tools.py

WeixuanYuan

Upload 31 files

b88cc47 over 1 year ago

raw

history blame contribute delete

2.17 kB

	import numpy as np
	import tensorflow as tf
	import matplotlib.pyplot as plt
	import matplotlib
	import librosa
	from scipy.io.wavfile import write

	k = 1e-16


	def np_log10(x):
	numerator = np.log(x + 1e-16)
	denominator = np.log(10)
	return numerator / denominator


	def sigmoid(x):
	s = 1 / (1 + np.exp(-x))
	return s


	def inv_sigmoid(s):
	x = np.log((s / (1 - s)) + 1e-16)
	return x


	def spc_to_VAE_input(spc):
	"""Restrict value range from 0 to 1."""
	return spc / (1 + spc)


	def VAE_out_put_to_spc(o):
	"""Inverse transform of function 'spc_to_VAE_input'."""
	return o / (1 - o + k)


	def denoise(spc):
	"""Filter back ground noise. (Not used.)"""
	return np.maximum(0, spc - (2e-5))


	hop_length = 256
	win_length = 1024


	def np_power_to_db(S, amin=1e-16, top_db=80.0):
	"""Helper method for scaling."""
	ref = np.max(S)

	# set fixed value for ref

	# 每个元素取max
	log_spec = 10.0 * np_log10(np.maximum(amin, S))
	log_spec -= 10.0 * np_log10(np.maximum(amin, ref))

	log_spec = np.maximum(log_spec, np.max(log_spec) - top_db)

	return log_spec


	def show_spc(spc, resolution=(512, 256)):
	"""Show a spectrogram."""
	spc = np.reshape(spc, resolution)
	magnitude_spectrum = np.abs(spc)
	log_spectrum = np_power_to_db(magnitude_spectrum)
	plt.imshow(np.flipud(log_spectrum))
	plt.show()


	def save_results(spectrogram, spectrogram_image_path, waveform_path):
	"""Save the input 'spectrogram' and its waveform (reconstructed bu Griffin Lim)
	to path provided by 'spectrogram_image_path' and 'waveform_path'."""
	# save image
	magnitude_spectrum = np.abs(spectrogram)
	log_spc = np_power_to_db(magnitude_spectrum)
	log_spc = np.reshape(log_spc, (512, 256))
	matplotlib.pyplot.imsave(spectrogram_image_path, log_spc, vmin=-100, vmax=0,
	origin='lower')

	# save waveform
	abs_spec = np.zeros((513, 256))
	abs_spec[:512, :] = abs_spec[:512, :] + np.sqrt(np.reshape(spectrogram, (512, 256)))
	rec_signal = librosa.griffinlim(abs_spec, n_iter=32, hop_length=256, win_length=1024)
	write(waveform_path, 16000, rec_signal)