dangtr0408 commited on
Commit
2914730
·
1 Parent(s): 0b61e28

Update config

Browse files
Files changed (1) hide show
  1. Models/config.yaml +14 -5
Models/config.yaml CHANGED
@@ -1,18 +1,26 @@
1
  log_dir: ./Models/Finetune
2
  save_freq: 1
3
- log_interval: 5
4
  device: cuda
5
  epochs: 50
6
  batch_size: 2
7
  max_len: 310 # maximum number of frames
8
  pretrained_model: ./Models/Finetune/base_model.pth
9
  load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
 
10
 
11
  data_params:
12
  train_data: ../../Data_Speech/LibriTTS/train.txt
13
  val_data: ../../Data_Speech/LibriTTS/val.txt
14
  root_path: ../../Data_Speech/
15
 
 
 
 
 
 
 
 
16
  preprocess_params:
17
  sr: 24000
18
  spect_params:
@@ -20,23 +28,24 @@ preprocess_params:
20
  win_length: 1200
21
  hop_length: 300
22
 
 
 
 
 
 
23
  model_params:
24
  dim_in: 64
25
  hidden_dim: 512
26
  max_conv_dim: 512
27
  n_layer: 3
28
  n_mels: 80
29
-
30
- n_token: 178 # number of phoneme tokens
31
  max_dur: 50 # maximum duration of a single phoneme
32
  style_dim: 128 # style vector size
33
-
34
  dropout: 0.2
35
 
36
  ASR_params:
37
  input_dim: 80
38
  hidden_dim: 256
39
- n_token: 178 # number of phoneme tokens
40
  n_layers: 6
41
  token_embedding_dim: 512
42
 
 
1
  log_dir: ./Models/Finetune
2
  save_freq: 1
3
+ log_interval: 10
4
  device: cuda
5
  epochs: 50
6
  batch_size: 2
7
  max_len: 310 # maximum number of frames
8
  pretrained_model: ./Models/Finetune/base_model.pth
9
  load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
10
+ debug: true
11
 
12
  data_params:
13
  train_data: ../../Data_Speech/LibriTTS/train.txt
14
  val_data: ../../Data_Speech/LibriTTS/val.txt
15
  root_path: ../../Data_Speech/
16
 
17
+ symbol: #Total 178 symbols
18
+ pad: "$"
19
+ punctuation: ';:,.!?¡¿—…"«»“” '
20
+ letters: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
21
+ letters_ipa: "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
22
+ extend: "" #ADD MORE SYMBOLS HERE
23
+
24
  preprocess_params:
25
  sr: 24000
26
  spect_params:
 
28
  win_length: 1200
29
  hop_length: 300
30
 
31
+ training_strats:
32
+ #All modules: 'decoder', 'predictor', 'text_encoder', 'style_encoder', 'text_aligner', 'pitch_extractor', 'mpd', 'msd'
33
+ freeze_modules: [''] # Not updated when training.
34
+ ignore_modules: [''] # Not loading => fresh start. IMPORTANT: 'text_aligner' and 'pitch_extractor' are util pretraineds DO NOT ignore them.
35
+
36
  model_params:
37
  dim_in: 64
38
  hidden_dim: 512
39
  max_conv_dim: 512
40
  n_layer: 3
41
  n_mels: 80
 
 
42
  max_dur: 50 # maximum duration of a single phoneme
43
  style_dim: 128 # style vector size
 
44
  dropout: 0.2
45
 
46
  ASR_params:
47
  input_dim: 80
48
  hidden_dim: 256
 
49
  n_layers: 6
50
  token_embedding_dim: 512
51