Definite
/

hwaja_insic

Model card Files Files and versions

xet

Community

Definite commited on Jun 6, 2024

Commit

b72ec2c

verified ·

1 Parent(s): ffa97eb

Upload hyperparams.yaml

Browse files

Files changed (1) hide show

hyperparams.yaml +27 -8

hyperparams.yaml CHANGED Viewed

@@ -1,4 +1,4 @@
-# Generated 2024-06-06 from:
 # /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
 # yamllint disable
 # ################################
@@ -11,6 +11,7 @@ seed: 1986
 __set_seed: !apply:torch.manual_seed [1986]
 output_folder: results/xvect_augment/1986
 save_folder: results/xvect_augment/1986/save
 train_log: results/xvect_augment/1986/train_log.txt
 # Data for augmentation
@@ -41,7 +42,7 @@ skip_prep: true
 ckpt_interval_minutes: 15 # save checkpoint every N min
 # Training parameters
-number_of_epochs: 5
 batch_size: 16
 lr: 0.001
 lr_final: 0.0001
@@ -61,11 +62,11 @@ deltas: false
 out_n_neurons: 1349 #1211 for vox1  # 5994 for vox2, 7205 for vox1+vox2
 emb_dim: 512
-num_workers: 2
 dataloader_options:
   batch_size: 16
   shuffle: true
-  num_workers: 2
 # Functions
 compute_features: &id005 !new:speechbrain.lobes.features.Fbank
@@ -91,7 +92,7 @@ classifier: &id007 !new:speechbrain.lobes.models.Xvector.Classifier
   out_neurons: 1349
 epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
-  limit: 5
 ############################## Augmentations ###################################
@@ -111,7 +112,7 @@ add_noise: &id001 !new:speechbrain.augment.time_domain.AddNoise
   snr_high: 15
   noise_sample_rate: 16000
   clean_sample_rate: 16000
-  num_workers: 2
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -126,7 +127,7 @@ add_reverb: &id002 !new:speechbrain.augment.time_domain.AddReverb
   csv_file: results/xvect_augment/1986/save/rir.csv
   reverb_sample_rate: 16000
   clean_sample_rate: 16000
-  num_workers: 2
 # Frequency drop: randomly drops a number of frequency bands to zero.
 drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
@@ -158,6 +159,12 @@ mean_var_norm: &id008 !new:speechbrain.processing.features.InputNormalization
   norm_type: sentence
   std_norm: false
 modules:
   compute_features: *id005
   embedding_model: *id006
@@ -173,7 +180,7 @@ opt_class: !name:torch.optim.Adam
 lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
   initial_value: 0.001
   final_value: 0.0001
-  epoch_count: 5
 # Logging + checkpoints
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
@@ -190,3 +197,15 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
     classifier: *id007
     normalizer: *id008
     counter: *id009

+# Generated 2024-06-04 from:
 # /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
 # yamllint disable
 # ################################
 __set_seed: !apply:torch.manual_seed [1986]
 output_folder: results/xvect_augment/1986
 save_folder: results/xvect_augment/1986/save
+pretrained_path: Definite/hwaja_insic
 train_log: results/xvect_augment/1986/train_log.txt
 # Data for augmentation
 ckpt_interval_minutes: 15 # save checkpoint every N min
 # Training parameters
+number_of_epochs: 1
 batch_size: 16
 lr: 0.001
 lr_final: 0.0001
 out_n_neurons: 1349 #1211 for vox1  # 5994 for vox2, 7205 for vox1+vox2
 emb_dim: 512
+num_workers: 4
 dataloader_options:
   batch_size: 16
   shuffle: true
+  num_workers: 4
 # Functions
 compute_features: &id005 !new:speechbrain.lobes.features.Fbank
   out_neurons: 1349
 epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
+  limit: 1
 ############################## Augmentations ###################################
   snr_high: 15
   noise_sample_rate: 16000
   clean_sample_rate: 16000
+  num_workers: 4
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
   csv_file: results/xvect_augment/1986/save/rir.csv
   reverb_sample_rate: 16000
   clean_sample_rate: 16000
+  num_workers: 4
 # Frequency drop: randomly drops a number of frequency bands to zero.
 drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
   norm_type: sentence
   std_norm: false
+mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization
+    norm_type: global
+    std_norm: False
+label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
 modules:
   compute_features: *id005
   embedding_model: *id006
 lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
   initial_value: 0.001
   final_value: 0.0001
+  epoch_count: 1
 # Logging + checkpoints
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     classifier: *id007
     normalizer: *id008
     counter: *id009
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+    loadables:
+        embedding_model: *id006
+        mean_var_norm: *id008
+        classifier: *id007
+        label_encoder: !ref <label_encoder>
+    paths:
+        embedding_model: !ref <pretrained_path>/embedding_model.ckpt
+        mean_var_norm: !ref <pretrained_path>/normalizer.ckpt
+        classifier: !ref <pretrained_path>/classifier.ckpt
+        label_encoder: !ref <pretrained_path>/label_encoder.txt