Upload 10 files

Browse files

Files changed (8) hide show

CKPT.yaml +2 -2
brain.ckpt +1 -1
classifier.ckpt +1 -1
counter.ckpt +1 -1
embedding_model.ckpt +1 -1
hyperparams.yaml +8 -27
normalizer.ckpt +1 -1
optimizer.ckpt +1 -1

CKPT.yaml CHANGED Viewed

@@ -1,4 +1,4 @@
 # yamllint disable
-ErrorRate: 0.3987189829349518
 end-of-epoch: true
-unixtime: 1717514247.4720354

 # yamllint disable
+ErrorRate: 0.04531625285744667
 end-of-epoch: true
+unixtime: 1717664391.509517

brain.ckpt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:847c6ac3bf30588a0216e35313ad9505053ca7e62871c1160c33d532b5c8815f
 size 50

 version https://git-lfs.github.com/spec/v1
+oid sha256:e49e946f9de64ad4fcaac843a9a9e7712219f2797cd4afe46cbf28c17c68c7b6
 size 50

classifier.ckpt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d292d66608240df48e5d2f530c94544a6fec4c02d7337aec2a2ef54b26c9337a
 size 3840482

 version https://git-lfs.github.com/spec/v1
+oid sha256:19747c44e51418be40e88ac862d700f3d4f4751716f137711f93b83e5e92a788
 size 3840482

counter.ckpt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b
 size 1

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef2d127de37b942baad06145e54b0c619a1f22327b2ebbcfbec78f5564afe39d
 size 1

embedding_model.ckpt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f039c9c9eb7c802f53f434c3e2a5ff6e6a2450fb878518ef5a2b94ccda65e47
 size 16887535

 version https://git-lfs.github.com/spec/v1
+oid sha256:47fda3e15d508365300a56bd22aefcc199af83ec18ab9a5a23565aef2e93efed
 size 16887535

hyperparams.yaml CHANGED Viewed

@@ -1,4 +1,4 @@
-# Generated 2024-06-04 from:
 # /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
 # yamllint disable
 # ################################
@@ -11,7 +11,6 @@ seed: 1986
 __set_seed: !apply:torch.manual_seed [1986]
 output_folder: results/xvect_augment/1986
 save_folder: results/xvect_augment/1986/save
-pretrained_path: Definite/hwaja_insic
 train_log: results/xvect_augment/1986/train_log.txt
 # Data for augmentation
@@ -42,7 +41,7 @@ skip_prep: true
 ckpt_interval_minutes: 15 # save checkpoint every N min
 # Training parameters
-number_of_epochs: 1
 batch_size: 16
 lr: 0.001
 lr_final: 0.0001
@@ -62,11 +61,11 @@ deltas: false
 out_n_neurons: 1349 #1211 for vox1  # 5994 for vox2, 7205 for vox1+vox2
 emb_dim: 512
-num_workers: 4
 dataloader_options:
   batch_size: 16
   shuffle: true
-  num_workers: 4
 # Functions
 compute_features: &id005 !new:speechbrain.lobes.features.Fbank
@@ -92,7 +91,7 @@ classifier: &id007 !new:speechbrain.lobes.models.Xvector.Classifier
   out_neurons: 1349
 epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
-  limit: 1
 ############################## Augmentations ###################################
@@ -112,7 +111,7 @@ add_noise: &id001 !new:speechbrain.augment.time_domain.AddNoise
   snr_high: 15
   noise_sample_rate: 16000
   clean_sample_rate: 16000
-  num_workers: 4
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
@@ -127,7 +126,7 @@ add_reverb: &id002 !new:speechbrain.augment.time_domain.AddReverb
   csv_file: results/xvect_augment/1986/save/rir.csv
   reverb_sample_rate: 16000
   clean_sample_rate: 16000
-  num_workers: 4
 # Frequency drop: randomly drops a number of frequency bands to zero.
 drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
@@ -159,12 +158,6 @@ mean_var_norm: &id008 !new:speechbrain.processing.features.InputNormalization
   norm_type: sentence
   std_norm: false
-mean_var_norm_emb: !new:speechbrain.processing.features.InputNormalization
-    norm_type: global
-    std_norm: False
-label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
 modules:
   compute_features: *id005
   embedding_model: *id006
@@ -180,7 +173,7 @@ opt_class: !name:torch.optim.Adam
 lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
   initial_value: 0.001
   final_value: 0.0001
-  epoch_count: 1
 # Logging + checkpoints
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
@@ -197,15 +190,3 @@ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
     classifier: *id007
     normalizer: *id008
     counter: *id009
-pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
-    loadables:
-        embedding_model: *id006
-        mean_var_norm: *id008
-        classifier: *id007
-        label_encoder: !ref <label_encoder>
-    paths:
-        embedding_model: !ref <pretrained_path>/embedding_model.ckpt
-        mean_var_norm: !ref <pretrained_path>/normalizer.ckpt
-        classifier: !ref <pretrained_path>/classifier.ckpt
-        label_encoder: !ref <pretrained_path>/label_encoder.txt

+# Generated 2024-06-06 from:
 # /content/speechbrain/recipes/VoxCeleb/SpeakerRec/hparams/train_x_vectors.yaml
 # yamllint disable
 # ################################
 __set_seed: !apply:torch.manual_seed [1986]
 output_folder: results/xvect_augment/1986
 save_folder: results/xvect_augment/1986/save
 train_log: results/xvect_augment/1986/train_log.txt
 # Data for augmentation
 ckpt_interval_minutes: 15 # save checkpoint every N min
 # Training parameters
+number_of_epochs: 5
 batch_size: 16
 lr: 0.001
 lr_final: 0.0001
 out_n_neurons: 1349 #1211 for vox1  # 5994 for vox2, 7205 for vox1+vox2
 emb_dim: 512
+num_workers: 2
 dataloader_options:
   batch_size: 16
   shuffle: true
+  num_workers: 2
 # Functions
 compute_features: &id005 !new:speechbrain.lobes.features.Fbank
   out_neurons: 1349
 epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
+  limit: 5
 ############################## Augmentations ###################################
   snr_high: 15
   noise_sample_rate: 16000
   clean_sample_rate: 16000
+  num_workers: 2
 # Download and prepare the dataset of room impulse responses for augmentation
 prepare_rir_data: !name:speechbrain.augment.preparation.prepare_dataset_from_URL
   csv_file: results/xvect_augment/1986/save/rir.csv
   reverb_sample_rate: 16000
   clean_sample_rate: 16000
+  num_workers: 2
 # Frequency drop: randomly drops a number of frequency bands to zero.
 drop_freq: &id003 !new:speechbrain.augment.time_domain.DropFreq
   norm_type: sentence
   std_norm: false
 modules:
   compute_features: *id005
   embedding_model: *id006
 lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
   initial_value: 0.001
   final_value: 0.0001
+  epoch_count: 5
 # Logging + checkpoints
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     classifier: *id007
     normalizer: *id008
     counter: *id009

normalizer.ckpt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92244ada292c7d670d1dc88549e74ed24b3e25e70f27fe443420cf4832d6811b
 size 1578

 version https://git-lfs.github.com/spec/v1
+oid sha256:b98d47768bfba78eaa8a052f7f3e864308f5fff7e34051c8cb2adfef9f451948
 size 1578

optimizer.ckpt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50c29db152d0f54bec401f30610ed2f1b8039829a334331a4840b8d43546e5c7
 size 41371844

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ff8086bb599afadf5aa6bb4f2149adbd574cd7c91974b563a4a8e223bc3754d
 size 41371844