dataset: bpe_model: bpe.model sample_rate: 24000 squeeze: false mel: sample_rate: 24000 n_fft: 1024 hop_length: 256 win_length: 1024 n_mels: 100 mel_fmin: 0 normalize: false gpt: model_dim: 1280 max_mel_tokens: 1815 max_text_tokens: 600 heads: 20 use_mel_codes_as_input: true mel_length_compression: 1024 layers: 24 number_text_tokens: 12000 number_mel_codes: 8194 start_mel_token: 8192 stop_mel_token: 8193 start_text_token: 0 stop_text_token: 1 train_solo_embeddings: false condition_type: "conformer_perceiver" condition_module: output_size: 512 linear_units: 2048 attention_heads: 8 num_blocks: 6 input_layer: "conv2d2" perceiver_mult: 2 emo_condition_module: output_size: 512 linear_units: 1024 attention_heads: 4 num_blocks: 4 input_layer: "conv2d2" perceiver_mult: 2 semantic_codec: codebook_size: 8192 hidden_size: 1024 codebook_dim: 8 vocos_dim: 384 vocos_intermediate_dim: 2048 vocos_num_layers: 12 s2mel: preprocess_params: sr: 22050 spect_params: n_fft: 1024 win_length: 1024 hop_length: 256 n_mels: 80 fmin: 0 fmax: "None" dit_type: "DiT" reg_loss_type: "l1" style_encoder: dim: 192 length_regulator: channels: 512 is_discrete: false in_channels: 1024 content_codebook_size: 2048 sampling_ratios: [1, 1, 1, 1] vector_quantize: false n_codebooks: 1 quantizer_dropout: 0.0 f0_condition: false n_f0_bins: 512 DiT: hidden_dim: 512 num_heads: 8 depth: 13 class_dropout_prob: 0.1 block_size: 8192 in_channels: 80 style_condition: true final_layer_type: 'wavenet' target: 'mel' content_dim: 512 content_codebook_size: 1024 content_type: 'discrete' f0_condition: false n_f0_bins: 512 content_codebooks: 1 is_causal: false long_skip_connection: true zero_prompt_speech_token: false time_as_token: false style_as_token: false uvit_skip_connection: true add_resblock_in_transformer: false wavenet: hidden_dim: 512 num_layers: 8 kernel_size: 5 dilation_rate: 1 p_dropout: 0.2 style_condition: true gpt_checkpoint: gpt.pth w2v_stat: wav2vec2bert_stats.pt s2mel_checkpoint: s2mel.pth emo_matrix: feat2.pt spk_matrix: feat1.pt emo_num: [3, 17, 2, 8, 4, 5, 10, 24] qwen_emo_path: qwen0.6bemo4-merge/ vocoder: type: "bigvgan" name: "nvidia/bigvgan_v2_22khz_80band_256x" version: 2.0