upload model with spemb
Browse files- exp/svs_train_visinger2_raw_phn_none_mix/500epoch.pth +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/config.yaml +40 -46
- exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_backward_time.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_fake_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_forward_time.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_optim_step_time.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_real_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_train_time.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_adv_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_backward_time.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_feat_match_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_forward_time.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_kl_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_mel_am_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_mel_ddsp_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_mel_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_optim_step_time.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_phn_dur_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_pitch_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_score_dur_loss.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/generator_train_time.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/gpu_max_cached_mem_GB.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/iter_time.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/optim0_lr0.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/optim1_lr0.png +2 -2
- exp/svs_train_visinger2_raw_phn_none_mix/images/train_time.png +2 -2
exp/svs_train_visinger2_raw_phn_none_mix/500epoch.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b2deb4f692ee1caf0e82a5c195dc23df5e9157ee7c3a2d81074a6c633fce972
|
3 |
+
size 448413582
|
exp/svs_train_visinger2_raw_phn_none_mix/config.yaml
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
-
config: conf/tuning/
|
2 |
print_config: false
|
3 |
log_level: INFO
|
4 |
drop_last_iter: false
|
5 |
dry_run: false
|
6 |
iterator_type: sequence
|
7 |
valid_iterator_type: null
|
8 |
-
output_dir: exp/
|
9 |
ngpu: 1
|
10 |
seed: 777
|
11 |
-
num_workers:
|
12 |
num_att_plot: 0
|
13 |
dist_backend: nccl
|
14 |
dist_init_method: env://
|
@@ -23,8 +23,6 @@ unused_parameters: true
|
|
23 |
sharded_ddp: false
|
24 |
use_deepspeed: false
|
25 |
deepspeed_config: null
|
26 |
-
gradient_as_bucket_view: true
|
27 |
-
ddp_comm_hook: null
|
28 |
cudnn_enabled: true
|
29 |
cudnn_benchmark: false
|
30 |
cudnn_deterministic: false
|
@@ -80,11 +78,11 @@ batch_bins: 1000000
|
|
80 |
valid_batch_bins: null
|
81 |
category_sample_size: 10
|
82 |
train_shape_file:
|
83 |
-
- exp/
|
84 |
-
- exp/
|
85 |
valid_shape_file:
|
86 |
-
- exp/
|
87 |
-
- exp/
|
88 |
batch_type: sorted
|
89 |
valid_batch_type: null
|
90 |
fold_length:
|
@@ -102,59 +100,53 @@ chunk_default_fs: null
|
|
102 |
chunk_max_abs_length: null
|
103 |
chunk_discard_short_samples: true
|
104 |
train_data_path_and_name_and_type:
|
105 |
-
- -
|
106 |
- text
|
107 |
- text
|
108 |
-
- -
|
109 |
- singing
|
110 |
- sound
|
111 |
-
- -
|
112 |
- label
|
113 |
- duration
|
114 |
-
- -
|
115 |
- score
|
116 |
- score
|
117 |
-
- - exp/
|
118 |
- pitch
|
119 |
- npy
|
120 |
-
- - exp/
|
121 |
- feats
|
122 |
- npy
|
123 |
-
- -
|
124 |
- spembs
|
125 |
- kaldi_ark
|
126 |
-
- -
|
127 |
-
- sids
|
128 |
-
- text_int
|
129 |
-
- - dump_mix/raw/tr_no_dev/utt2lid
|
130 |
- lids
|
131 |
- text_int
|
132 |
valid_data_path_and_name_and_type:
|
133 |
-
- -
|
134 |
- text
|
135 |
- text
|
136 |
-
- -
|
137 |
- singing
|
138 |
- sound
|
139 |
-
- -
|
140 |
- label
|
141 |
- duration
|
142 |
-
- -
|
143 |
- score
|
144 |
- score
|
145 |
-
- - exp/
|
146 |
- pitch
|
147 |
- npy
|
148 |
-
- - exp/
|
149 |
- feats
|
150 |
- npy
|
151 |
-
- -
|
152 |
- spembs
|
153 |
- kaldi_ark
|
154 |
-
- -
|
155 |
-
- sids
|
156 |
-
- text_int
|
157 |
-
- - dump_mix/raw/dev/utt2lid
|
158 |
- lids
|
159 |
- text_int
|
160 |
multi_task_dataset: false
|
@@ -194,7 +186,6 @@ token_list:
|
|
194 |
- <blank>
|
195 |
- <unk>
|
196 |
- SP
|
197 |
-
- AP
|
198 |
- i@zh
|
199 |
- e@zh
|
200 |
- d@zh
|
@@ -210,8 +201,8 @@ token_list:
|
|
210 |
- j@zh
|
211 |
- l@zh
|
212 |
- h@zh
|
213 |
-
- b@zh
|
214 |
- iii@zh
|
|
|
215 |
- zh@zh
|
216 |
- uei@zh
|
217 |
- m@zh
|
@@ -224,9 +215,10 @@ token_list:
|
|
224 |
- an@zh
|
225 |
- en@zh
|
226 |
- iou@zh
|
227 |
-
- t@zh
|
228 |
- ou@zh
|
|
|
229 |
- ao@zh
|
|
|
230 |
- ong@zh
|
231 |
- iang@zh
|
232 |
- ang@zh
|
@@ -237,13 +229,13 @@ token_list:
|
|
237 |
- r@zh
|
238 |
- k@zh
|
239 |
- ch@zh
|
|
|
240 |
- v@zh
|
241 |
- in@zh
|
242 |
-
- a@jp
|
243 |
-
- uan@zh
|
244 |
- o@jp
|
245 |
-
-
|
246 |
- i@jp
|
|
|
247 |
- s@zh
|
248 |
- uang@zh
|
249 |
- ii@zh
|
@@ -253,17 +245,17 @@ token_list:
|
|
253 |
- u@jp
|
254 |
- e@jp
|
255 |
- ia@zh
|
256 |
-
- uen@zh
|
257 |
- k@jp
|
|
|
258 |
- ua@zh
|
259 |
- n@jp
|
260 |
-
- iong@zh
|
261 |
- t@jp
|
262 |
- r@jp
|
263 |
-
-
|
264 |
- m@jp
|
265 |
-
-
|
266 |
- s@jp
|
|
|
267 |
- vn@zh
|
268 |
- w@jp
|
269 |
- d@jp
|
@@ -271,8 +263,8 @@ token_list:
|
|
271 |
- sh@jp
|
272 |
- g@jp
|
273 |
- y@jp
|
274 |
-
- o@zh
|
275 |
- b@jp
|
|
|
276 |
- ts@jp
|
277 |
- h@jp
|
278 |
- cl@jp
|
@@ -302,6 +294,7 @@ non_linguistic_symbols: null
|
|
302 |
cleaner: null
|
303 |
g2p: null
|
304 |
fs: 44100
|
|
|
305 |
postfrontend: null
|
306 |
postfrontend_conf: {}
|
307 |
score_feats_extract: syllable_score_feats
|
@@ -321,14 +314,14 @@ feats_extract_conf:
|
|
321 |
n_mels: 80
|
322 |
normalize: global_mvn
|
323 |
normalize_conf:
|
324 |
-
stats_file: exp/
|
325 |
svs: vits
|
326 |
svs_conf:
|
327 |
generator_type: visinger2
|
328 |
vocoder_generator_type: visinger2
|
329 |
generator_params:
|
330 |
hidden_channels: 192
|
331 |
-
|
332 |
langs: 3
|
333 |
global_channels: 256
|
334 |
segment_size: 20
|
@@ -517,6 +510,7 @@ svs_conf:
|
|
517 |
lambda_kl: 1.0
|
518 |
sampling_rate: 44100
|
519 |
cache_generator_outputs: true
|
|
|
520 |
pitch_extract: dio
|
521 |
pitch_extract_conf:
|
522 |
use_token_averaged_f0: false
|
@@ -528,7 +522,7 @@ pitch_extract_conf:
|
|
528 |
f0min: 80
|
529 |
pitch_normalize: null
|
530 |
pitch_normalize_conf:
|
531 |
-
stats_file: exp/
|
532 |
ying_extract: null
|
533 |
ying_extract_conf: {}
|
534 |
energy_extract: null
|
@@ -538,5 +532,5 @@ energy_normalize_conf: {}
|
|
538 |
required:
|
539 |
- output_dir
|
540 |
- token_list
|
541 |
-
version: '
|
542 |
distributed: false
|
|
|
1 |
+
config: conf/tuning/train_visinger2_44k_spk_embed_lang.yaml
|
2 |
print_config: false
|
3 |
log_level: INFO
|
4 |
drop_last_iter: false
|
5 |
dry_run: false
|
6 |
iterator_type: sequence
|
7 |
valid_iterator_type: null
|
8 |
+
output_dir: exp/svs_train_visinger2_44k_spk_embed_lang_0409
|
9 |
ngpu: 1
|
10 |
seed: 777
|
11 |
+
num_workers: 4
|
12 |
num_att_plot: 0
|
13 |
dist_backend: nccl
|
14 |
dist_init_method: env://
|
|
|
23 |
sharded_ddp: false
|
24 |
use_deepspeed: false
|
25 |
deepspeed_config: null
|
|
|
|
|
26 |
cudnn_enabled: true
|
27 |
cudnn_benchmark: false
|
28 |
cudnn_deterministic: false
|
|
|
78 |
valid_batch_bins: null
|
79 |
category_sample_size: 10
|
80 |
train_shape_file:
|
81 |
+
- exp/svs_stats_raw_phn_None_zh_jp_44100Hz/train/text_shape.phn
|
82 |
+
- exp/svs_stats_raw_phn_None_zh_jp_44100Hz/train/singing_shape
|
83 |
valid_shape_file:
|
84 |
+
- exp/svs_stats_raw_phn_None_zh_jp_44100Hz/valid/text_shape.phn
|
85 |
+
- exp/svs_stats_raw_phn_None_zh_jp_44100Hz/valid/singing_shape
|
86 |
batch_type: sorted
|
87 |
valid_batch_type: null
|
88 |
fold_length:
|
|
|
100 |
chunk_max_abs_length: null
|
101 |
chunk_discard_short_samples: true
|
102 |
train_data_path_and_name_and_type:
|
103 |
+
- - dump44100/raw/tr_no_dev/text
|
104 |
- text
|
105 |
- text
|
106 |
+
- - dump44100/raw/tr_no_dev/wav.scp
|
107 |
- singing
|
108 |
- sound
|
109 |
+
- - dump44100/raw/tr_no_dev/label
|
110 |
- label
|
111 |
- duration
|
112 |
+
- - dump44100/raw/tr_no_dev/score.scp
|
113 |
- score
|
114 |
- score
|
115 |
+
- - exp/svs_stats_raw_phn_None_zh_jp_44100Hz/train/collect_feats/pitch.scp
|
116 |
- pitch
|
117 |
- npy
|
118 |
+
- - exp/svs_stats_raw_phn_None_zh_jp_44100Hz/train/collect_feats/feats.scp
|
119 |
- feats
|
120 |
- npy
|
121 |
+
- - dump44100/raw/tr_no_dev/espnet_spk.scp
|
122 |
- spembs
|
123 |
- kaldi_ark
|
124 |
+
- - dump44100/raw/tr_no_dev/utt2lid
|
|
|
|
|
|
|
125 |
- lids
|
126 |
- text_int
|
127 |
valid_data_path_and_name_and_type:
|
128 |
+
- - dump44100/raw/dev/text
|
129 |
- text
|
130 |
- text
|
131 |
+
- - dump44100/raw/dev/wav.scp
|
132 |
- singing
|
133 |
- sound
|
134 |
+
- - dump44100/raw/dev/label
|
135 |
- label
|
136 |
- duration
|
137 |
+
- - dump44100/raw/dev/score.scp
|
138 |
- score
|
139 |
- score
|
140 |
+
- - exp/svs_stats_raw_phn_None_zh_jp_44100Hz/valid/collect_feats/pitch.scp
|
141 |
- pitch
|
142 |
- npy
|
143 |
+
- - exp/svs_stats_raw_phn_None_zh_jp_44100Hz/valid/collect_feats/feats.scp
|
144 |
- feats
|
145 |
- npy
|
146 |
+
- - dump44100/raw/dev/espnet_spk.scp
|
147 |
- spembs
|
148 |
- kaldi_ark
|
149 |
+
- - dump44100/raw/dev/utt2lid
|
|
|
|
|
|
|
150 |
- lids
|
151 |
- text_int
|
152 |
multi_task_dataset: false
|
|
|
186 |
- <blank>
|
187 |
- <unk>
|
188 |
- SP
|
|
|
189 |
- i@zh
|
190 |
- e@zh
|
191 |
- d@zh
|
|
|
201 |
- j@zh
|
202 |
- l@zh
|
203 |
- h@zh
|
|
|
204 |
- iii@zh
|
205 |
+
- b@zh
|
206 |
- zh@zh
|
207 |
- uei@zh
|
208 |
- m@zh
|
|
|
215 |
- an@zh
|
216 |
- en@zh
|
217 |
- iou@zh
|
|
|
218 |
- ou@zh
|
219 |
+
- t@zh
|
220 |
- ao@zh
|
221 |
+
- AP
|
222 |
- ong@zh
|
223 |
- iang@zh
|
224 |
- ang@zh
|
|
|
229 |
- r@zh
|
230 |
- k@zh
|
231 |
- ch@zh
|
232 |
+
- a@jp
|
233 |
- v@zh
|
234 |
- in@zh
|
|
|
|
|
235 |
- o@jp
|
236 |
+
- uan@zh
|
237 |
- i@jp
|
238 |
+
- c@zh
|
239 |
- s@zh
|
240 |
- uang@zh
|
241 |
- ii@zh
|
|
|
245 |
- u@jp
|
246 |
- e@jp
|
247 |
- ia@zh
|
|
|
248 |
- k@jp
|
249 |
+
- uen@zh
|
250 |
- ua@zh
|
251 |
- n@jp
|
|
|
252 |
- t@jp
|
253 |
- r@jp
|
254 |
+
- iong@zh
|
255 |
- m@jp
|
256 |
+
- uai@zh
|
257 |
- s@jp
|
258 |
+
- er@zh
|
259 |
- vn@zh
|
260 |
- w@jp
|
261 |
- d@jp
|
|
|
263 |
- sh@jp
|
264 |
- g@jp
|
265 |
- y@jp
|
|
|
266 |
- b@jp
|
267 |
+
- o@zh
|
268 |
- ts@jp
|
269 |
- h@jp
|
270 |
- cl@jp
|
|
|
294 |
cleaner: null
|
295 |
g2p: null
|
296 |
fs: 44100
|
297 |
+
use_spk_contrastive_loss: null
|
298 |
postfrontend: null
|
299 |
postfrontend_conf: {}
|
300 |
score_feats_extract: syllable_score_feats
|
|
|
314 |
n_mels: 80
|
315 |
normalize: global_mvn
|
316 |
normalize_conf:
|
317 |
+
stats_file: exp/svs_stats_raw_phn_None_zh_jp_44100Hz/train/feats_stats.npz
|
318 |
svs: vits
|
319 |
svs_conf:
|
320 |
generator_type: visinger2
|
321 |
vocoder_generator_type: visinger2
|
322 |
generator_params:
|
323 |
hidden_channels: 192
|
324 |
+
spk_embed_dim: 192
|
325 |
langs: 3
|
326 |
global_channels: 256
|
327 |
segment_size: 20
|
|
|
510 |
lambda_kl: 1.0
|
511 |
sampling_rate: 44100
|
512 |
cache_generator_outputs: true
|
513 |
+
device: cuda
|
514 |
pitch_extract: dio
|
515 |
pitch_extract_conf:
|
516 |
use_token_averaged_f0: false
|
|
|
522 |
f0min: 80
|
523 |
pitch_normalize: null
|
524 |
pitch_normalize_conf:
|
525 |
+
stats_file: exp/svs_stats_raw_phn_None_zh_jp_44100Hz/train/pitch_stats.npz
|
526 |
ying_extract: null
|
527 |
ying_extract_conf: {}
|
528 |
energy_extract: null
|
|
|
532 |
required:
|
533 |
- output_dir
|
534 |
- token_list
|
535 |
+
version: '202402'
|
536 |
distributed: false
|
exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_backward_time.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_fake_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_forward_time.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_optim_step_time.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_real_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/discriminator_train_time.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_adv_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_backward_time.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_feat_match_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_forward_time.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_kl_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_mel_am_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_mel_ddsp_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_mel_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_optim_step_time.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_phn_dur_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_pitch_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_score_dur_loss.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/generator_train_time.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/gpu_max_cached_mem_GB.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/iter_time.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/optim0_lr0.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/optim1_lr0.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|
exp/svs_train_visinger2_raw_phn_none_mix/images/train_time.png
CHANGED
![]() |
Git LFS Details
|
![]() |
Git LFS Details
|