ahnafsamin commited on
Commit
3d2a517
1 Parent(s): 2e18ae5

Upload 2 files

Browse files
config.yaml ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_fastspeech2.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_train_fastspeech2_raw_char_tacotron
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 1000
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ - - train
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 5
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 1.0
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 8
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ pretrain_path: null
64
+ init_param: []
65
+ ignore_init_mismatch: false
66
+ freeze_param: []
67
+ num_iters_per_epoch: 800
68
+ batch_size: 20
69
+ valid_batch_size: null
70
+ batch_bins: 3000000
71
+ valid_batch_bins: null
72
+ train_shape_file:
73
+ - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/text_shape.char
74
+ - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/speech_shape
75
+ valid_shape_file:
76
+ - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/text_shape.char
77
+ - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/speech_shape
78
+ batch_type: numel
79
+ valid_batch_type: null
80
+ fold_length:
81
+ - 150
82
+ - 204800
83
+ sort_in_batch: descending
84
+ sort_batch: descending
85
+ multiple_iterator: false
86
+ chunk_length: 500
87
+ chunk_shift_ratio: 0.5
88
+ num_cache_chunks: 1024
89
+ train_data_path_and_name_and_type:
90
+ - - dump/raw/tr_no_dev/text
91
+ - text
92
+ - text
93
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/tr_no_dev/durations
94
+ - durations
95
+ - text_int
96
+ - - dump/raw/tr_no_dev/wav.scp
97
+ - speech
98
+ - sound
99
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/collect_feats/pitch.scp
100
+ - pitch
101
+ - npy
102
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/collect_feats/energy.scp
103
+ - energy
104
+ - npy
105
+ valid_data_path_and_name_and_type:
106
+ - - dump/raw/dev/text
107
+ - text
108
+ - text
109
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/dev/durations
110
+ - durations
111
+ - text_int
112
+ - - dump/raw/dev/wav.scp
113
+ - speech
114
+ - sound
115
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/collect_feats/pitch.scp
116
+ - pitch
117
+ - npy
118
+ - - exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/valid/collect_feats/energy.scp
119
+ - energy
120
+ - npy
121
+ allow_variable_data_keys: false
122
+ max_cache_size: 0.0
123
+ max_cache_fd: 32
124
+ valid_max_cache_size: null
125
+ optim: adam
126
+ optim_conf:
127
+ lr: 1.0
128
+ scheduler: noamlr
129
+ scheduler_conf:
130
+ model_size: 384
131
+ warmup_steps: 4000
132
+ token_list:
133
+ - <blank>
134
+ - <unk>
135
+ - <space>
136
+ - E
137
+ - N
138
+ - A
139
+ - O
140
+ - I
141
+ - T
142
+ - R
143
+ - D
144
+ - S
145
+ - K
146
+ - L
147
+ - M
148
+ - U
149
+ - G
150
+ - H
151
+ - W
152
+ - V
153
+ - Z
154
+ - B
155
+ - P
156
+ - J
157
+ - C
158
+ - F
159
+ - ''''
160
+ - Y
161
+ - X
162
+ - Q
163
+ - <sos/eos>
164
+ odim: null
165
+ model_conf: {}
166
+ use_preprocessor: true
167
+ token_type: char
168
+ bpemodel: null
169
+ non_linguistic_symbols: null
170
+ cleaner: tacotron
171
+ g2p: g2p_en
172
+ feats_extract: fbank
173
+ feats_extract_conf:
174
+ n_fft: 1024
175
+ hop_length: 256
176
+ win_length: null
177
+ fs: 22050
178
+ fmin: 80
179
+ fmax: 7600
180
+ n_mels: 80
181
+ normalize: global_mvn
182
+ normalize_conf:
183
+ stats_file: exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/feats_stats.npz
184
+ tts: fastspeech2
185
+ tts_conf:
186
+ adim: 384
187
+ aheads: 2
188
+ elayers: 4
189
+ eunits: 1536
190
+ dlayers: 4
191
+ dunits: 1536
192
+ positionwise_layer_type: conv1d
193
+ positionwise_conv_kernel_size: 3
194
+ duration_predictor_layers: 2
195
+ duration_predictor_chans: 256
196
+ duration_predictor_kernel_size: 3
197
+ postnet_layers: 5
198
+ postnet_filts: 5
199
+ postnet_chans: 256
200
+ use_masking: true
201
+ use_scaled_pos_enc: true
202
+ encoder_normalize_before: true
203
+ decoder_normalize_before: true
204
+ reduction_factor: 1
205
+ init_type: xavier_uniform
206
+ init_enc_alpha: 1.0
207
+ init_dec_alpha: 1.0
208
+ transformer_enc_dropout_rate: 0.2
209
+ transformer_enc_positional_dropout_rate: 0.2
210
+ transformer_enc_attn_dropout_rate: 0.2
211
+ transformer_dec_dropout_rate: 0.2
212
+ transformer_dec_positional_dropout_rate: 0.2
213
+ transformer_dec_attn_dropout_rate: 0.2
214
+ pitch_predictor_layers: 5
215
+ pitch_predictor_chans: 256
216
+ pitch_predictor_kernel_size: 5
217
+ pitch_predictor_dropout: 0.5
218
+ pitch_embed_kernel_size: 1
219
+ pitch_embed_dropout: 0.0
220
+ stop_gradient_from_pitch_predictor: true
221
+ energy_predictor_layers: 2
222
+ energy_predictor_chans: 256
223
+ energy_predictor_kernel_size: 3
224
+ energy_predictor_dropout: 0.5
225
+ energy_embed_kernel_size: 1
226
+ energy_embed_dropout: 0.0
227
+ stop_gradient_from_energy_predictor: false
228
+ pitch_extract: dio
229
+ pitch_extract_conf:
230
+ fs: 22050
231
+ n_fft: 1024
232
+ hop_length: 256
233
+ f0max: 400
234
+ f0min: 80
235
+ reduction_factor: 1
236
+ pitch_normalize: global_mvn
237
+ pitch_normalize_conf:
238
+ stats_file: exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/pitch_stats.npz
239
+ energy_extract: energy
240
+ energy_extract_conf:
241
+ fs: 22050
242
+ n_fft: 1024
243
+ hop_length: 256
244
+ win_length: null
245
+ reduction_factor: 1
246
+ energy_normalize: global_mvn
247
+ energy_normalize_conf:
248
+ stats_file: exp/tts_train_raw_char_tacotron/decode_use_teacher_forcingtrue_train.loss.ave/stats/train/energy_stats.npz
249
+ required:
250
+ - output_dir
251
+ - token_list
252
+ version: 0.10.6
253
+ distributed: false
tts_train_fastspeech2_raw_char_tacotron_train.loss.ave.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8196ba5c604df8bd321555d96724c2bd6ae7f1eab3851080ecf0e994ef6c0b1e
3
+ size 149185016