jungjee commited on
Commit
1cb6c7e
1 Parent(s): 982f418

Update model

Browse files
Files changed (23) hide show
  1. README.md +15 -15
  2. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/8epoch.pth +3 -0
  3. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/RESULTS.md +17 -0
  4. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/config.yaml +199 -0
  5. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/backward_time.png +0 -0
  6. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/clip.png +0 -0
  7. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/eer.png +0 -0
  8. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/forward_time.png +0 -0
  9. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/gpu_max_cached_mem_GB.png +0 -0
  10. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/grad_norm.png +0 -0
  11. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/iter_time.png +0 -0
  12. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/loss.png +0 -0
  13. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/loss_scale.png +0 -0
  14. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/mindcf.png +0 -0
  15. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/n_trials.png +0 -0
  16. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/nontrg_mean.png +0 -0
  17. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/nontrg_std.png +0 -0
  18. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/optim0_lr0.png +0 -0
  19. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/optim_step_time.png +0 -0
  20. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/train_time.png +0 -0
  21. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/trg_mean.png +0 -0
  22. exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/trg_std.png +0 -0
  23. meta.yaml +3 -3
README.md CHANGED
@@ -11,7 +11,7 @@ license: cc-by-4.0
11
 
12
  ## ESPnet2 SPK model
13
 
14
- ### `espnet/voxcelebs12_ecapa`
15
 
16
  This model was trained by Jungjee using voxceleb recipe in [espnet](https://github.com/espnet/espnet/).
17
 
@@ -22,16 +22,16 @@ if you haven't done that already.
22
 
23
  ```bash
24
  cd espnet
25
- git checkout 53b99bf9bc043c4444b760f1d53ab719c77cb386
26
  pip install -e .
27
  cd egs2/voxceleb/spk1
28
- ./run.sh --skip_data_prep false --skip_train true --download_model espnet/voxcelebs12_ecapa
29
  ```
30
 
31
  <!-- Generated by scripts/utils/show_spk_result.py -->
32
  # RESULTS
33
  ## Environments
34
- date: 2023-11-30 15:56:55.514299
35
 
36
  - python version: 3.9.16 (main, Mar 8 2023, 14:00:05) [GCC 11.2.0]
37
  - espnet version: 202310
@@ -39,26 +39,26 @@ date: 2023-11-30 15:56:55.514299
39
 
40
  | | Mean | Std |
41
  |---|---|---|
42
- | Target | -0.7353 | 0.1212 |
43
- | Non-target | 0.0916 | 0.0916 |
44
 
45
  | Model name | EER(%) | minDCF |
46
  |---|---|---|
47
- | ecapa_wavlm_joint | 0.425 | 0.04015 |
48
 
49
  ## SPK config
50
 
51
  <details><summary>expand</summary>
52
 
53
  ```
54
- config: conf/tuning/train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt9.yaml
55
  print_config: false
56
  log_level: INFO
57
  drop_last_iter: true
58
  dry_run: false
59
  iterator_type: category
60
  valid_iterator_type: sequence
61
- output_dir: exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt9_raw_sp
62
  ngpu: 1
63
  seed: 0
64
  num_workers: 8
@@ -69,7 +69,7 @@ dist_world_size: 4
69
  dist_rank: 0
70
  local_rank: 0
71
  dist_master_addr: localhost
72
- dist_master_port: 38103
73
  dist_launcher: null
74
  multiprocessing_distributed: true
75
  unused_parameters: true
@@ -92,7 +92,7 @@ best_model_criterion:
92
  - - valid
93
  - eer
94
  - min
95
- keep_nbest_models: 3
96
  nbest_averaging_interval: 0
97
  grad_clip: 9999
98
  grad_clip_type: 2.0
@@ -121,7 +121,7 @@ init_param:
121
  - save_exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_raw_sp/valid.eer.best.pth
122
  ignore_init_mismatch: false
123
  freeze_param: []
124
- num_iters_per_epoch: 16000
125
  batch_size: 64
126
  valid_batch_size: 5
127
  batch_bins: 1000000
@@ -174,11 +174,11 @@ optim_conf:
174
  amsgrad: false
175
  scheduler: cosineannealingwarmuprestarts
176
  scheduler_conf:
177
- first_cycle_steps: 320000
178
  cycle_mult: 1.0
179
- max_lr: 0.0001
180
  min_lr: 5.0e-06
181
- warmup_steps: 2000
182
  gamma: 0.75
183
  init: null
184
  use_preprocessor: true
 
11
 
12
  ## ESPnet2 SPK model
13
 
14
+ ### `espnet/voxcelebs12_ecapa_wavlm_joint`
15
 
16
  This model was trained by Jungjee using voxceleb recipe in [espnet](https://github.com/espnet/espnet/).
17
 
 
22
 
23
  ```bash
24
  cd espnet
25
+ git checkout e5da124138fc58708fcdda03fd8d4e02fe5d7c65
26
  pip install -e .
27
  cd egs2/voxceleb/spk1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/voxcelebs12_ecapa_wavlm_joint
29
  ```
30
 
31
  <!-- Generated by scripts/utils/show_spk_result.py -->
32
  # RESULTS
33
  ## Environments
34
+ date: 2023-12-05 12:47:46.810012
35
 
36
  - python version: 3.9.16 (main, Mar 8 2023, 14:00:05) [GCC 11.2.0]
37
  - espnet version: 202310
 
39
 
40
  | | Mean | Std |
41
  |---|---|---|
42
+ | Target | -0.7332 | 0.1202 |
43
+ | Non-target | 0.0910 | 0.0910 |
44
 
45
  | Model name | EER(%) | minDCF |
46
  |---|---|---|
47
+ | conf/tuning/train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11 | 0.415 | 0.03813 |
48
 
49
  ## SPK config
50
 
51
  <details><summary>expand</summary>
52
 
53
  ```
54
+ config: conf/tuning/train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11.yaml
55
  print_config: false
56
  log_level: INFO
57
  drop_last_iter: true
58
  dry_run: false
59
  iterator_type: category
60
  valid_iterator_type: sequence
61
+ output_dir: exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp
62
  ngpu: 1
63
  seed: 0
64
  num_workers: 8
 
69
  dist_rank: 0
70
  local_rank: 0
71
  dist_master_addr: localhost
72
+ dist_master_port: 57359
73
  dist_launcher: null
74
  multiprocessing_distributed: true
75
  unused_parameters: true
 
92
  - - valid
93
  - eer
94
  - min
95
+ keep_nbest_models: 2
96
  nbest_averaging_interval: 0
97
  grad_clip: 9999
98
  grad_clip_type: 2.0
 
121
  - save_exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_raw_sp/valid.eer.best.pth
122
  ignore_init_mismatch: false
123
  freeze_param: []
124
+ num_iters_per_epoch: 8000
125
  batch_size: 64
126
  valid_batch_size: 5
127
  batch_bins: 1000000
 
174
  amsgrad: false
175
  scheduler: cosineannealingwarmuprestarts
176
  scheduler_conf:
177
+ first_cycle_steps: 10000
178
  cycle_mult: 1.0
179
+ max_lr: 5.0e-05
180
  min_lr: 5.0e-06
181
+ warmup_steps: 1000
182
  gamma: 0.75
183
  init: null
184
  use_preprocessor: true
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/8epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c364606a99dd5f2de03caaf936388d0a437fbd41262828f741cadec600372ab8
3
+ size 1389974254
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/RESULTS.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_spk_result.py -->
2
+ # RESULTS
3
+ ## Environments
4
+ date: 2023-12-05 12:47:46.810012
5
+
6
+ - python version: 3.9.16 (main, Mar 8 2023, 14:00:05) [GCC 11.2.0]
7
+ - espnet version: 202310
8
+ - pytorch version: 2.0.1
9
+
10
+ | | Mean | Std |
11
+ |---|---|---|
12
+ | Target | -0.7332 | 0.1202 |
13
+ | Non-target | 0.0910 | 0.0910 |
14
+
15
+ | Model name | EER(%) | minDCF |
16
+ |---|---|---|
17
+ | conf/tuning/train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11 | 0.415 | 0.03813 |
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/config.yaml ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: true
5
+ dry_run: false
6
+ iterator_type: category
7
+ valid_iterator_type: sequence
8
+ output_dir: exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp
9
+ ngpu: 1
10
+ seed: 0
11
+ num_workers: 8
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: 4
16
+ dist_rank: 0
17
+ local_rank: 0
18
+ dist_master_addr: localhost
19
+ dist_master_port: 57359
20
+ dist_launcher: null
21
+ multiprocessing_distributed: true
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: true
26
+ cudnn_deterministic: false
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 20
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - eer
41
+ - min
42
+ keep_nbest_models: 2
43
+ nbest_averaging_interval: 0
44
+ grad_clip: 9999
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 16
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: true
52
+ log_interval: 100
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ use_lora: false
64
+ save_lora_only: true
65
+ lora_conf: {}
66
+ pretrain_path: null
67
+ init_param:
68
+ - save_exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_raw_sp/valid.eer.best.pth
69
+ ignore_init_mismatch: false
70
+ freeze_param: []
71
+ num_iters_per_epoch: 8000
72
+ batch_size: 64
73
+ valid_batch_size: 5
74
+ batch_bins: 1000000
75
+ valid_batch_bins: null
76
+ train_shape_file:
77
+ - exp/spk_stats_16k_sp/train/speech_shape
78
+ valid_shape_file:
79
+ - exp/spk_stats_16k_sp/valid/speech_shape
80
+ batch_type: folded
81
+ valid_batch_type: null
82
+ fold_length:
83
+ - 120000
84
+ sort_in_batch: descending
85
+ shuffle_within_batch: false
86
+ sort_batch: descending
87
+ multiple_iterator: false
88
+ chunk_length: 500
89
+ chunk_shift_ratio: 0.5
90
+ num_cache_chunks: 1024
91
+ chunk_excluded_key_prefixes: []
92
+ chunk_default_fs: null
93
+ train_data_path_and_name_and_type:
94
+ - - dump/raw/voxceleb12_devs_sp/wav.scp
95
+ - speech
96
+ - sound
97
+ - - dump/raw/voxceleb12_devs_sp/utt2spk
98
+ - spk_labels
99
+ - text
100
+ valid_data_path_and_name_and_type:
101
+ - - dump/raw/voxceleb1_test/trial.scp
102
+ - speech
103
+ - sound
104
+ - - dump/raw/voxceleb1_test/trial2.scp
105
+ - speech2
106
+ - sound
107
+ - - dump/raw/voxceleb1_test/trial_label
108
+ - spk_labels
109
+ - text
110
+ allow_variable_data_keys: false
111
+ max_cache_size: 0.0
112
+ max_cache_fd: 32
113
+ allow_multi_rates: false
114
+ valid_max_cache_size: null
115
+ exclude_weight_decay: false
116
+ exclude_weight_decay_conf: {}
117
+ optim: adam
118
+ optim_conf:
119
+ lr: 0.0001
120
+ weight_decay: 1.0e-05
121
+ amsgrad: false
122
+ scheduler: cosineannealingwarmuprestarts
123
+ scheduler_conf:
124
+ first_cycle_steps: 10000
125
+ cycle_mult: 1.0
126
+ max_lr: 5.0e-05
127
+ min_lr: 5.0e-06
128
+ warmup_steps: 1000
129
+ gamma: 0.75
130
+ init: null
131
+ use_preprocessor: true
132
+ input_size: null
133
+ target_duration: 3.0
134
+ spk2utt: dump/raw/voxceleb12_devs_sp/spk2utt
135
+ spk_num: 21615
136
+ sample_rate: 16000
137
+ num_eval: 10
138
+ rir_scp: ''
139
+ model_conf:
140
+ extract_feats_in_collect_stats: false
141
+ frontend: s3prl
142
+ frontend_conf:
143
+ frontend_conf:
144
+ upstream: wavlm_large
145
+ download_dir: ./hub
146
+ multilayer_feature: true
147
+ specaug: null
148
+ specaug_conf: {}
149
+ normalize: utterance_mvn
150
+ normalize_conf:
151
+ norm_vars: false
152
+ encoder: ecapa_tdnn
153
+ encoder_conf:
154
+ model_scale: 8
155
+ ndim: 1024
156
+ output_size: 1536
157
+ pooling: chn_attn_stat
158
+ pooling_conf: {}
159
+ projector: rawnet3
160
+ projector_conf:
161
+ output_size: 192
162
+ preprocessor: spk
163
+ preprocessor_conf:
164
+ target_duration: 6.0
165
+ sample_rate: 16000
166
+ num_eval: 3
167
+ noise_apply_prob: 0.0
168
+ noise_info:
169
+ - - 1.0
170
+ - dump/raw/musan_speech.scp
171
+ - - 4
172
+ - 7
173
+ - - 13
174
+ - 20
175
+ - - 1.0
176
+ - dump/raw/musan_noise.scp
177
+ - - 1
178
+ - 1
179
+ - - 0
180
+ - 15
181
+ - - 1.0
182
+ - dump/raw/musan_music.scp
183
+ - - 1
184
+ - 1
185
+ - - 5
186
+ - 15
187
+ rir_apply_prob: 0.0
188
+ rir_scp: dump/raw/rirs.scp
189
+ loss: aamsoftmax_sc_topk
190
+ loss_conf:
191
+ margin: 0.5
192
+ scale: 30
193
+ K: 3
194
+ mp: 0.06
195
+ k_top: 5
196
+ required:
197
+ - output_dir
198
+ version: '202310'
199
+ distributed: true
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/backward_time.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/clip.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/eer.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/forward_time.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/grad_norm.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/iter_time.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/loss.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/loss_scale.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/mindcf.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/n_trials.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/nontrg_mean.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/nontrg_std.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/optim0_lr0.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/optim_step_time.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/train_time.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/trg_mean.png ADDED
exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/images/trg_std.png ADDED
meta.yaml CHANGED
@@ -1,8 +1,8 @@
1
  espnet: '202310'
2
  files:
3
- model_file: exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt9_raw_sp/3epoch.pth
4
  python: "3.9.16 (main, Mar 8 2023, 14:00:05) \n[GCC 11.2.0]"
5
- timestamp: 1701378221.436117
6
  torch: 2.0.1
7
  yaml_files:
8
- train_config: exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt9_raw_sp/config.yaml
 
1
  espnet: '202310'
2
  files:
3
+ model_file: exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/8epoch.pth
4
  python: "3.9.16 (main, Mar 8 2023, 14:00:05) \n[GCC 11.2.0]"
5
+ timestamp: 1704236578.76806
6
  torch: 2.0.1
7
  yaml_files:
8
+ train_config: exp/spk_train_ecapa_Vox12_emb192_torchmelspec_subcentertopk_wavlm_jt11_raw_sp/config.yaml