Upload hyperparams.yaml
Browse files- hyperparams.yaml +27 -8
hyperparams.yaml
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# Generated 2024-06-
|
| 2 |
# /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
|
| 3 |
# yamllint disable
|
| 4 |
# ################################
|
|
@@ -11,6 +11,7 @@ seed: 1986
|
|
| 11 |
__set_seed: !apply:torch.manual_seed [1986]
|
| 12 |
output_folder: results/xvect_augment/1986
|
| 13 |
save_folder: results/xvect_augment/1986/save
|
|
|
|
| 14 |
train_log: results/xvect_augment/1986/train_log.txt
|
| 15 |
|
| 16 |
# Data for augmentation
|
|
@@ -41,7 +42,7 @@ skip_prep: true
|
|
| 41 |
ckpt_interval_minutes: 15 # save checkpoint every N min
|
| 42 |
|
| 43 |
# Training parameters
|
| 44 |
-
number_of_epochs:
|
| 45 |
batch_size: 16
|
| 46 |
lr: 0.001
|
| 47 |
lr_final: 0.0001
|
|
@@ -61,11 +62,11 @@ deltas: false
|
|
| 61 |
out_n_neurons: 1349 #1211 for vox1 # 5994 for vox2, 7205 for vox1+vox2
|
| 62 |
emb_dim: 512
|
| 63 |
|
| 64 |
-
num_workers:
|
| 65 |
dataloader_options:
|
| 66 |
batch_size: 16
|
| 67 |
shuffle: true
|
| 68 |
-
num_workers:
|
| 69 |
|
| 70 |
# Functions
|
| 71 |
compute_features: &id005 !new:speechbrain.lobes.features.Fbank
|
|
@@ -91,7 +92,7 @@ classifier: &id007 !new:speechbrain.lobes.models.Xvector.Classifier
|
|
| 91 |
out_neurons: 1349
|
| 92 |
|
| 93 |
epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
|
| 94 |
-
limit:
|
| 95 |
|
| 96 |
############################## Augmentations ###################################
|
| 97 |
|
|
@@ -111,7 +112,7 @@ add_noise: &id001 !new:speechbrain.augment.time_domain.AddNoise
|
|
| 111 |
snr_high: 15
|
| 112 |
noise_sample_rate: 16000
|
| 113 |
clean_sample_rate: 16000
|
| 114 |
-
num_workers:
|
| 115 |
|
| 116 |
# Download and prepare the dataset of room impulse responses for augmentation
|
| 117 |
prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
|
|
@@ -126,7 +127,7 @@ add_reverb: &id002 !new:speechbrain.augment.time_domain.AddReverb
|
|
| 126 |
csv_file: results/xvect_augment/1986/save/rir.csv
|
| 127 |
reverb_sample_rate: 16000
|
| 128 |
clean_sample_rate: 16000
|
| 129 |
-
num_workers:
|
| 130 |
|
| 131 |
# Frequency drop: randomly drops a number of frequency bands to zero.
|
| 132 |
drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
|
|
@@ -158,6 +159,12 @@ mean_var_norm: &id008 !new:speechbrain.processing.features.InputNormalization
|
|
| 158 |
norm_type: sentence
|
| 159 |
std_norm: false
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
modules:
|
| 162 |
compute_features: *id005
|
| 163 |
embedding_model: *id006
|
|
@@ -173,7 +180,7 @@ opt_class: !name:torch.optim.Adam
|
|
| 173 |
lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
|
| 174 |
initial_value: 0.001
|
| 175 |
final_value: 0.0001
|
| 176 |
-
epoch_count:
|
| 177 |
|
| 178 |
# Logging + checkpoints
|
| 179 |
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
|
|
@@ -190,3 +197,15 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
|
|
| 190 |
classifier: *id007
|
| 191 |
normalizer: *id008
|
| 192 |
counter: *id009
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Generated 2024-06-04 from:
|
| 2 |
# /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
|
| 3 |
# yamllint disable
|
| 4 |
# ################################
|
|
|
|
| 11 |
__set_seed: !apply:torch.manual_seed [1986]
|
| 12 |
output_folder: results/xvect_augment/1986
|
| 13 |
save_folder: results/xvect_augment/1986/save
|
| 14 |
+
pretrained_path: Definite/hwaja_insic
|
| 15 |
train_log: results/xvect_augment/1986/train_log.txt
|
| 16 |
|
| 17 |
# Data for augmentation
|
|
|
|
| 42 |
ckpt_interval_minutes: 15 # save checkpoint every N min
|
| 43 |
|
| 44 |
# Training parameters
|
| 45 |
+
number_of_epochs: 1
|
| 46 |
batch_size: 16
|
| 47 |
lr: 0.001
|
| 48 |
lr_final: 0.0001
|
|
|
|
| 62 |
out_n_neurons: 1349 #1211 for vox1 # 5994 for vox2, 7205 for vox1+vox2
|
| 63 |
emb_dim: 512
|
| 64 |
|
| 65 |
+
num_workers: 4
|
| 66 |
dataloader_options:
|
| 67 |
batch_size: 16
|
| 68 |
shuffle: true
|
| 69 |
+
num_workers: 4
|
| 70 |
|
| 71 |
# Functions
|
| 72 |
compute_features: &id005 !new:speechbrain.lobes.features.Fbank
|
|
|
|
| 92 |
out_neurons: 1349
|
| 93 |
|
| 94 |
epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
|
| 95 |
+
limit: 1
|
| 96 |
|
| 97 |
############################## Augmentations ###################################
|
| 98 |
|
|
|
|
| 112 |
snr_high: 15
|
| 113 |
noise_sample_rate: 16000
|
| 114 |
clean_sample_rate: 16000
|
| 115 |
+
num_workers: 4
|
| 116 |
|
| 117 |
# Download and prepare the dataset of room impulse responses for augmentation
|
| 118 |
prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
|
|
|
|
| 127 |
csv_file: results/xvect_augment/1986/save/rir.csv
|
| 128 |
reverb_sample_rate: 16000
|
| 129 |
clean_sample_rate: 16000
|
| 130 |
+
num_workers: 4
|
| 131 |
|
| 132 |
# Frequency drop: randomly drops a number of frequency bands to zero.
|
| 133 |
drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
|
|
|
|
| 159 |
norm_type: sentence
|
| 160 |
std_norm: false
|
| 161 |
|
| 162 |
+
mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization
|
| 163 |
+
norm_type: global
|
| 164 |
+
std_norm: False
|
| 165 |
+
|
| 166 |
+
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
|
| 167 |
+
|
| 168 |
modules:
|
| 169 |
compute_features: *id005
|
| 170 |
embedding_model: *id006
|
|
|
|
| 180 |
lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
|
| 181 |
initial_value: 0.001
|
| 182 |
final_value: 0.0001
|
| 183 |
+
epoch_count: 1
|
| 184 |
|
| 185 |
# Logging + checkpoints
|
| 186 |
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
|
|
|
|
| 197 |
classifier: *id007
|
| 198 |
normalizer: *id008
|
| 199 |
counter: *id009
|
| 200 |
+
|
| 201 |
+
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
|
| 202 |
+
loadables:
|
| 203 |
+
embedding_model: *id006
|
| 204 |
+
mean_var_norm: *id008
|
| 205 |
+
classifier: *id007
|
| 206 |
+
label_encoder: !ref <label_encoder>
|
| 207 |
+
paths:
|
| 208 |
+
embedding_model: !ref <pretrained_path>/embedding_model.ckpt
|
| 209 |
+
mean_var_norm: !ref <pretrained_path>/normalizer.ckpt
|
| 210 |
+
classifier: !ref <pretrained_path>/classifier.ckpt
|
| 211 |
+
label_encoder: !ref <pretrained_path>/label_encoder.txt
|