Style-Bert-VITS2-JVNV

Running

App Files Files Community

Style-Bert-VITS2-JVNV / utils.py

litagin

init

78b6bf0 almost 2 years ago

raw

history blame

15.2 kB

	import argparse
	import glob
	import json
	import logging
	import os
	import re
	import subprocess

	import numpy as np
	import torch
	from huggingface_hub import hf_hub_download
	from safetensors import safe_open
	from safetensors.torch import save_file
	from scipy.io.wavfile import read

	from tools.log import logger

	MATPLOTLIB_FLAG = False


	def download_checkpoint(
	dir_path, repo_config, token=None, regex="G_*.pth", mirror="openi"
	):
	repo_id = repo_config["repo_id"]
	f_list = glob.glob(os.path.join(dir_path, regex))
	if f_list:
	print("Use existed model, skip downloading.")
	return
	for file in ["DUR_0.pth", "D_0.pth", "G_0.pth"]:
	hf_hub_download(repo_id, file, local_dir=dir_path, local_dir_use_symlinks=False)


	def load_checkpoint(
	checkpoint_path, model, optimizer=None, skip_optimizer=False, for_infer=False
	):
	assert os.path.isfile(checkpoint_path)
	checkpoint_dict = torch.load(checkpoint_path, map_location="cpu")
	iteration = checkpoint_dict["iteration"]
	learning_rate = checkpoint_dict["learning_rate"]
	logger.info(
	f"Loading model and optimizer at iteration {iteration} from {checkpoint_path}"
	)
	if (
	optimizer is not None
	and not skip_optimizer
	and checkpoint_dict["optimizer"] is not None
	):
	optimizer.load_state_dict(checkpoint_dict["optimizer"])
	elif optimizer is None and not skip_optimizer:
	# else: Disable this line if Infer and resume checkpoint,then enable the line upper
	new_opt_dict = optimizer.state_dict()
	new_opt_dict_params = new_opt_dict["param_groups"][0]["params"]
	new_opt_dict["param_groups"] = checkpoint_dict["optimizer"]["param_groups"]
	new_opt_dict["param_groups"][0]["params"] = new_opt_dict_params
	optimizer.load_state_dict(new_opt_dict)

	saved_state_dict = checkpoint_dict["model"]
	if hasattr(model, "module"):
	state_dict = model.module.state_dict()
	else:
	state_dict = model.state_dict()

	new_state_dict = {}
	for k, v in state_dict.items():
	try:
	# assert "emb_g" not in k
	new_state_dict[k] = saved_state_dict[k]
	assert saved_state_dict[k].shape == v.shape, (
	saved_state_dict[k].shape,
	v.shape,
	)
	except:
	# For upgrading from the old version
	if "ja_bert_proj" in k:
	v = torch.zeros_like(v)
	logger.warning(
	f"Seems you are using the old version of the model, the {k} is automatically set to zero for backward compatibility"
	)
	elif "enc_q" in k and for_infer:
	continue
	else:
	logger.error(f"{k} is not in the checkpoint {checkpoint_path}")

	new_state_dict[k] = v

	if hasattr(model, "module"):
	model.module.load_state_dict(new_state_dict, strict=False)
	else:
	model.load_state_dict(new_state_dict, strict=False)

	logger.info("Loaded '{}' (iteration {})".format(checkpoint_path, iteration))

	return model, optimizer, learning_rate, iteration


	def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):
	logger.info(
	"Saving model and optimizer state at iteration {} to {}".format(
	iteration, checkpoint_path
	)
	)
	if hasattr(model, "module"):
	state_dict = model.module.state_dict()
	else:
	state_dict = model.state_dict()
	torch.save(
	{
	"model": state_dict,
	"iteration": iteration,
	"optimizer": optimizer.state_dict(),
	"learning_rate": learning_rate,
	},
	checkpoint_path,
	)


	def save_safetensors(model, iteration, checkpoint_path, is_half=False, for_infer=False):
	"""
	Save model with safetensors.
	"""
	if hasattr(model, "module"):
	state_dict = model.module.state_dict()
	else:
	state_dict = model.state_dict()
	keys = []
	for k in state_dict:
	if "enc_q" in k and for_infer:
	continue # noqa: E701
	keys.append(k)

	new_dict = (
	{k: state_dict[k].half() for k in keys}
	if is_half
	else {k: state_dict[k] for k in keys}
	)
	new_dict["iteration"] = torch.LongTensor([iteration])
	logger.info(f"Saved safetensors to {checkpoint_path}")
	save_file(new_dict, checkpoint_path)


	def load_safetensors(checkpoint_path, model, for_infer=False):
	"""
	Load safetensors model.
	"""

	tensors = {}
	iteration = None
	with safe_open(checkpoint_path, framework="pt", device="cpu") as f:
	for key in f.keys():
	if key == "iteration":
	iteration = f.get_tensor(key).item()
	tensors[key] = f.get_tensor(key)
	if hasattr(model, "module"):
	result = model.module.load_state_dict(tensors, strict=False)
	else:
	result = model.load_state_dict(tensors, strict=False)
	for key in result.missing_keys:
	if key.startswith("enc_q") and for_infer:
	continue
	logger.warning(f"Missing key: {key}")
	for key in result.unexpected_keys:
	if key == "iteration":
	continue
	logger.warning(f"Unexpected key: {key}")
	if iteration is None:
	logger.info(f"Loaded '{checkpoint_path}'")
	else:
	logger.info(f"Loaded '{checkpoint_path}' (iteration {iteration})")
	return model, iteration


	def summarize(
	writer,
	global_step,
	scalars={},
	histograms={},
	images={},
	audios={},
	audio_sampling_rate=22050,
	):
	for k, v in scalars.items():
	writer.add_scalar(k, v, global_step)
	for k, v in histograms.items():
	writer.add_histogram(k, v, global_step)
	for k, v in images.items():
	writer.add_image(k, v, global_step, dataformats="HWC")
	for k, v in audios.items():
	writer.add_audio(k, v, global_step, audio_sampling_rate)


	def is_resuming(dir_path):
	g_list = glob.glob(os.path.join(dir_path, "G_*.pth"))
	d_list = glob.glob(os.path.join(dir_path, "D_*.pth"))
	dur_list = glob.glob(os.path.join(dir_path, "DUR_*.pth"))
	return len(g_list) > 0 and len(d_list) > 0 and len(dur_list) > 0


	def latest_checkpoint_path(dir_path, regex="G_*.pth"):
	f_list = glob.glob(os.path.join(dir_path, regex))
	f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
	try:
	x = f_list[-1]
	except IndexError:
	raise ValueError(f"No checkpoint found in {dir_path} with regex {regex}")
	return x


	def plot_spectrogram_to_numpy(spectrogram):
	global MATPLOTLIB_FLAG
	if not MATPLOTLIB_FLAG:
	import matplotlib

	matplotlib.use("Agg")
	MATPLOTLIB_FLAG = True
	mpl_logger = logging.getLogger("matplotlib")
	mpl_logger.setLevel(logging.WARNING)
	import matplotlib.pylab as plt
	import numpy as np

	fig, ax = plt.subplots(figsize=(10, 2))
	im = ax.imshow(spectrogram, aspect="auto", origin="lower", interpolation="none")
	plt.colorbar(im, ax=ax)
	plt.xlabel("Frames")
	plt.ylabel("Channels")
	plt.tight_layout()

	fig.canvas.draw()
	data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="")
	data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
	plt.close()
	return data


	def plot_alignment_to_numpy(alignment, info=None):
	global MATPLOTLIB_FLAG
	if not MATPLOTLIB_FLAG:
	import matplotlib

	matplotlib.use("Agg")
	MATPLOTLIB_FLAG = True
	mpl_logger = logging.getLogger("matplotlib")
	mpl_logger.setLevel(logging.WARNING)
	import matplotlib.pylab as plt
	import numpy as np

	fig, ax = plt.subplots(figsize=(6, 4))
	im = ax.imshow(
	alignment.transpose(), aspect="auto", origin="lower", interpolation="none"
	)
	fig.colorbar(im, ax=ax)
	xlabel = "Decoder timestep"
	if info is not None:
	xlabel += "\n\n" + info
	plt.xlabel(xlabel)
	plt.ylabel("Encoder timestep")
	plt.tight_layout()

	fig.canvas.draw()
	data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="")
	data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
	plt.close()
	return data


	def load_wav_to_torch(full_path):
	sampling_rate, data = read(full_path)
	return torch.FloatTensor(data.astype(np.float32)), sampling_rate


	def load_filepaths_and_text(filename, split="\|"):
	with open(filename, encoding="utf-8") as f:
	filepaths_and_text = [line.strip().split(split) for line in f]
	return filepaths_and_text


	def get_hparams(init=True):
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"-c",
	"--config",
	type=str,
	default="./configs/base.json",
	help="JSON file for configuration",
	)
	parser.add_argument("-m", "--model", type=str, required=True, help="Model name")

	args = parser.parse_args()
	model_dir = os.path.join("./logs", args.model)

	if not os.path.exists(model_dir):
	os.makedirs(model_dir)

	config_path = args.config
	config_save_path = os.path.join(model_dir, "config.json")
	if init:
	with open(config_path, "r", encoding="utf-8") as f:
	data = f.read()
	with open(config_save_path, "w", encoding="utf-8") as f:
	f.write(data)
	else:
	with open(config_save_path, "r", vencoding="utf-8") as f:
	data = f.read()
	config = json.loads(data)
	hparams = HParams(**config)
	hparams.model_dir = model_dir
	return hparams


	def clean_checkpoints(path_to_models="logs/44k/", n_ckpts_to_keep=2, sort_by_time=True):
	"""Freeing up space by deleting saved ckpts

	Arguments:
	path_to_models -- Path to the model directory
	n_ckpts_to_keep -- Number of ckpts to keep, excluding G_0.pth and D_0.pth
	sort_by_time -- True -> chronologically delete ckpts
	False -> lexicographically delete ckpts
	"""
	import re

	ckpts_files = [
	f
	for f in os.listdir(path_to_models)
	if os.path.isfile(os.path.join(path_to_models, f))
	]

	def name_key(_f):
	return int(re.compile("._(\\d+)\\.pth").match(_f).group(1))

	def time_key(_f):
	return os.path.getmtime(os.path.join(path_to_models, _f))

	sort_key = time_key if sort_by_time else name_key

	def x_sorted(_x):
	return sorted(
	[f for f in ckpts_files if f.startswith(_x) and not f.endswith("_0.pth")],
	key=sort_key,
	)

	to_del = [
	os.path.join(path_to_models, fn)
	for fn in (
	x_sorted("G_")[:-n_ckpts_to_keep]
	+ x_sorted("D_")[:-n_ckpts_to_keep]
	+ x_sorted("WD_")[:-n_ckpts_to_keep]
	+ x_sorted("DUR_")[:-n_ckpts_to_keep]
	)
	]

	def del_info(fn):
	return logger.info(f".. Free up space by deleting ckpt {fn}")

	def del_routine(x):
	return [os.remove(x), del_info(x)]

	[del_routine(fn) for fn in to_del]


	def get_hparams_from_dir(model_dir):
	config_save_path = os.path.join(model_dir, "config.json")
	with open(config_save_path, "r", encoding="utf-8") as f:
	data = f.read()
	config = json.loads(data)

	hparams = HParams(**config)
	hparams.model_dir = model_dir
	return hparams


	def get_hparams_from_file(config_path):
	# print("config_path: ", config_path)
	with open(config_path, "r", encoding="utf-8") as f:
	data = f.read()
	config = json.loads(data)

	hparams = HParams(**config)
	return hparams


	def check_git_hash(model_dir):
	source_dir = os.path.dirname(os.path.realpath(__file__))
	if not os.path.exists(os.path.join(source_dir, ".git")):
	logger.warning(
	"{} is not a git repository, therefore hash value comparison will be ignored.".format(
	source_dir
	)
	)
	return

	cur_hash = subprocess.getoutput("git rev-parse HEAD")

	path = os.path.join(model_dir, "githash")
	if os.path.exists(path):
	saved_hash = open(path).read()
	if saved_hash != cur_hash:
	logger.warning(
	"git hash values are different. {}(saved) != {}(current)".format(
	saved_hash[:8], cur_hash[:8]
	)
	)
	else:
	open(path, "w").write(cur_hash)


	def get_logger(model_dir, filename="train.log"):
	global logger
	logger = logging.getLogger(os.path.basename(model_dir))
	logger.setLevel(logging.DEBUG)

	formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s")
	if not os.path.exists(model_dir):
	os.makedirs(model_dir)
	h = logging.FileHandler(os.path.join(model_dir, filename))
	h.setLevel(logging.DEBUG)
	h.setFormatter(formatter)
	logger.addHandler(h)
	return logger


	class HParams:
	def __init__(self, **kwargs):
	for k, v in kwargs.items():
	if type(v) == dict:
	v = HParams(**v)
	self[k] = v

	def keys(self):
	return self.__dict__.keys()

	def items(self):
	return self.__dict__.items()

	def values(self):
	return self.__dict__.values()

	def __len__(self):
	return len(self.__dict__)

	def __getitem__(self, key):
	return getattr(self, key)

	def __setitem__(self, key, value):
	return setattr(self, key, value)

	def __contains__(self, key):
	return key in self.__dict__

	def __repr__(self):
	return self.__dict__.__repr__()


	def load_model(model_path, config_path):
	hps = get_hparams_from_file(config_path)
	net = SynthesizerTrn(
	# len(symbols),
	108,
	hps.data.filter_length // 2 + 1,
	hps.train.segment_size // hps.data.hop_length,
	n_speakers=hps.data.n_speakers,
	**hps.model,
	).to("cpu")
	_ = net.eval()
	_ = load_checkpoint(model_path, net, None, skip_optimizer=True)
	return net


	def mix_model(
	network1, network2, output_path, voice_ratio=(0.5, 0.5), tone_ratio=(0.5, 0.5)
	):
	if hasattr(network1, "module"):
	state_dict1 = network1.module.state_dict()
	state_dict2 = network2.module.state_dict()
	else:
	state_dict1 = network1.state_dict()
	state_dict2 = network2.state_dict()
	for k in state_dict1.keys():
	if k not in state_dict2.keys():
	continue
	if "enc_p" in k:
	state_dict1[k] = (
	state_dict1[k].clone() * tone_ratio[0]
	+ state_dict2[k].clone() * tone_ratio[1]
	)
	else:
	state_dict1[k] = (
	state_dict1[k].clone() * voice_ratio[0]
	+ state_dict2[k].clone() * voice_ratio[1]
	)
	for k in state_dict2.keys():
	if k not in state_dict1.keys():
	state_dict1[k] = state_dict2[k].clone()
	torch.save(
	{"model": state_dict1, "iteration": 0, "optimizer": None, "learning_rate": 0},
	output_path,
	)


	def get_steps(model_path):
	matches = re.findall(r"\d+", model_path)
	return matches[-1] if matches else None