muhtasham commited on
Commit
587299e
·
verified ·
1 Parent(s): 9ec6f4a

Upload hyperparams.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +186 -0
hyperparams.yaml ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2025-03-24 from:
2
+ # /workspace/speechbrain/recipes/LJSpeech/TTS/vocoder/hifigan/hparams/finetune_all.yaml
3
+ # yamllint disable
4
+ ###################################
5
+ # Experiment Parameters and setup #
6
+ ###################################
7
+ seed: 1234
8
+ __set_seed: !apply:speechbrain.utils.seed_everything [1234]
9
+ output_folder: ./results/hifi_gan_finetune_all/1234
10
+ save_folder: ./results/hifi_gan_finetune_all/1234/save
11
+ train_log: ./results/hifi_gan_finetune_all/1234/train_log.txt
12
+ progress_sample_path: ./results/hifi_gan_finetune_all/1234/samples
13
+ epochs: 500 # Reduced epochs for finetuning
14
+ keep_checkpoint_interval: 50
15
+ use_tensorboard: true
16
+
17
+ #################################
18
+ # Data files and pre-processing #
19
+ #################################
20
+ data_folder: all_wav_files
21
+ # e.g, /path/to/your/wav/files
22
+ train_json: ./results/hifi_gan_finetune_all/1234/save/train.json
23
+ valid_json: ./results/hifi_gan_finetune_all/1234/save/valid.json
24
+ test_json: ./results/hifi_gan_finetune_all/1234/save/test.json
25
+
26
+ splits: [train, valid]
27
+ split_ratio: [90, 10]
28
+ ################################
29
+ # Audio Parameters #
30
+ ################################
31
+ skip_prep: false
32
+
33
+ segment_size: 8192
34
+ sample_rate: 22050
35
+ hop_length: 256
36
+ win_length: 1024
37
+ n_mel_channels: 80
38
+ n_fft: 1024
39
+ mel_fmin: 0.0
40
+ mel_fmax: 8000
41
+ mel_normalized: false
42
+ power: 1
43
+ norm: slaney
44
+ mel_scale: slaney
45
+ dynamic_range_compression: true
46
+
47
+
48
+ ################################
49
+ # Optimization Hyperparameters #
50
+ ################################
51
+ learning_rate: 0.00005 # Lower learning rate for finetuning
52
+ weight_decay: 0.9999
53
+ adam_b1: 0.8
54
+ adam_b2: 0.99
55
+ batch_size: 32
56
+ num_workers: 8
57
+
58
+ train_dataloader_opts:
59
+ batch_size: 32
60
+ drop_last: false
61
+ num_workers: 8
62
+
63
+ valid_dataloader_opts:
64
+ batch_size: 1
65
+ num_workers: 8
66
+
67
+ test_dataloader_opts:
68
+ batch_size: 1
69
+ num_workers: 8
70
+
71
+ ################################
72
+ # Model Parameters and model #
73
+ ################################
74
+
75
+ # generator params
76
+ in_channels: 80
77
+ out_channels: 1
78
+
79
+ resblock_type: '1'
80
+ resblock_dilation_sizes: &id001 [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
81
+ resblock_kernel_sizes: &id002 [3, 7, 11]
82
+ upsample_kernel_sizes: &id003 [16, 16, 4, 4]
83
+ upsample_initial_channel: 512
84
+ upsample_factors: &id004 [8, 8, 2, 2]
85
+
86
+ inference_padding: 5
87
+ cond_channels: 0
88
+ conv_post_bias: true
89
+
90
+ mel_spectogram: !name:speechbrain.lobes.models.HifiGAN.mel_spectogram
91
+ sample_rate: 22050
92
+ hop_length: 256
93
+ win_length: 1024
94
+ n_fft: 1024
95
+ n_mels: 80
96
+ f_min: 0.0
97
+ f_max: 8000
98
+ power: 1
99
+ normalized: false
100
+ norm: slaney
101
+ mel_scale: slaney
102
+ compression: true
103
+
104
+ generator: &id005 !new:speechbrain.lobes.models.HifiGAN.HifiganGenerator
105
+ in_channels: 80
106
+ out_channels: 1
107
+ resblock_type: '1'
108
+ resblock_dilation_sizes: *id001
109
+ resblock_kernel_sizes: *id002
110
+ upsample_kernel_sizes: *id003
111
+ upsample_initial_channel: 512
112
+ upsample_factors: *id004
113
+ inference_padding: 5
114
+ cond_channels: 0
115
+ conv_post_bias: true
116
+
117
+ discriminator: &id006 !new:speechbrain.lobes.models.HifiGAN.HifiganDiscriminator
118
+
119
+ #generator loss
120
+
121
+ modules:
122
+ generator: *id005
123
+ discriminator: *id006
124
+ stft_loss:
125
+ mseg_loss: &id007 !new:speechbrain.lobes.models.HifiGAN.MSEGLoss
126
+ feat_match_loss: &id008 !new:speechbrain.lobes.models.HifiGAN.MelganFeatureLoss
127
+ l1_spec_loss: &id009 !new:speechbrain.lobes.models.HifiGAN.L1SpecLoss
128
+ sample_rate: 22050
129
+ hop_length: 256
130
+ win_length: 1024
131
+ n_mel_channels: 80
132
+ n_fft: 1024
133
+ n_stft: 513
134
+ mel_fmin: 0.0
135
+ mel_fmax:
136
+ mel_normalized: false
137
+ power: 1
138
+ dynamic_range_compression: true
139
+
140
+ generator_loss: !new:speechbrain.lobes.models.HifiGAN.GeneratorLoss
141
+ stft_loss:
142
+ stft_loss_weight: 0
143
+ mseg_loss: *id007
144
+ mseg_loss_weight: 1
145
+ feat_match_loss: *id008
146
+ feat_match_loss_weight: 10
147
+ l1_spec_loss: *id009
148
+ l1_spec_loss_weight: 45
149
+
150
+ #discriminator loss
151
+ msed_loss: &id010 !new:speechbrain.lobes.models.HifiGAN.MSEDLoss
152
+
153
+ #optimizer
154
+
155
+ discriminator_loss: !new:speechbrain.lobes.models.HifiGAN.DiscriminatorLoss
156
+ msed_loss: *id010
157
+ opt_class_generator: !name:torch.optim.AdamW
158
+ lr: 0.00005
159
+ betas: [0.8, 0.99]
160
+
161
+ opt_class_discriminator: !name:torch.optim.AdamW
162
+ lr: 0.00005
163
+ betas: [0.8, 0.99]
164
+
165
+ sch_class_generator: !name:torch.optim.lr_scheduler.ExponentialLR
166
+ gamma: 0.9999
167
+ last_epoch: -1
168
+
169
+ sch_class_discriminator: !name:torch.optim.lr_scheduler.ExponentialLR
170
+ gamma: 0.9999
171
+ last_epoch: -1
172
+
173
+ #epoch object
174
+ epoch_counter: &id011 !new:speechbrain.utils.epoch_loop.EpochCounter
175
+ limit: 500
176
+
177
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
178
+ save_file: ./results/hifi_gan_finetune_all/1234/train_log.txt
179
+
180
+ #checkpointer
181
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
182
+ checkpoints_dir: ./results/hifi_gan_finetune_all/1234/save
183
+ recoverables:
184
+ generator: *id005
185
+ discriminator: *id006
186
+ counter: *id011