dangtr0408 commited on
Commit
82da400
·
1 Parent(s): 1016ef2

Update config

Browse files
Files changed (1) hide show
  1. Models/config.yaml +19 -10
Models/config.yaml CHANGED
@@ -1,18 +1,25 @@
1
- log_dir: "Models/Finetune_Extend"
2
  save_freq: 1
3
- log_interval: 5
4
- device: "cuda"
5
  epochs: 50
6
- batch_size: 3
7
- max_len: 210 # maximum number of frames
8
- pretrained_model: "Models/Finetune_Extend/current_model.pth"
9
  load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
 
10
 
11
  data_params:
12
  train_data: "../../Data_Speech/viVoice/train.txt"
13
  val_data: "../../Data_Speech/combine/combine_val.txt"
14
  root_path: "../../Data_Speech/"
15
- min_length: 50 # sample until texts with this size are obtained for OOD texts
 
 
 
 
 
 
16
 
17
  preprocess_params:
18
  sr: 24000
@@ -21,14 +28,17 @@ preprocess_params:
21
  win_length: 1200
22
  hop_length: 300
23
 
 
 
 
 
 
24
  model_params:
25
  dim_in: 64
26
  hidden_dim: 512
27
  max_conv_dim: 512
28
  n_layer: 3
29
  n_mels: 80
30
-
31
- n_token: 189 # number of phoneme tokens
32
  max_dur: 50 # maximum duration of a single phoneme
33
  style_dim: 128 # style vector size
34
 
@@ -37,7 +47,6 @@ model_params:
37
  ASR_params:
38
  input_dim: 80
39
  hidden_dim: 256
40
- n_token: 189 # number of phoneme tokens
41
  n_layers: 6
42
  token_embedding_dim: 512
43
 
 
1
+ log_dir: ./Models/Finetune
2
  save_freq: 1
3
+ log_interval: 10
4
+ device: cuda
5
  epochs: 50
6
+ batch_size: 2
7
+ max_len: 310 # maximum number of frames
8
+ pretrained_model: ./Models/Finetune/base_model.pth
9
  load_only_params: false # set to true if do not want to load epoch numbers and optimizer parameters
10
+ debug: true
11
 
12
  data_params:
13
  train_data: "../../Data_Speech/viVoice/train.txt"
14
  val_data: "../../Data_Speech/combine/combine_val.txt"
15
  root_path: "../../Data_Speech/"
16
+
17
+ symbol: #Total 189 symbols
18
+ pad: "$"
19
+ punctuation: ';:,.!?¡¿—…"«»“” '
20
+ letters: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
21
+ letters_ipa: "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
22
+ extend: "∫̆ăη͡123456" #ADD MORE SYMBOLS HERE
23
 
24
  preprocess_params:
25
  sr: 24000
 
28
  win_length: 1200
29
  hop_length: 300
30
 
31
+ training_strats:
32
+ #All modules: 'decoder', 'predictor', 'text_encoder', 'style_encoder', 'text_aligner', 'pitch_extractor', 'mpd', 'msd'
33
+ freeze_modules: [''] # Not updated when training.
34
+ ignore_modules: [''] # Not loading => fresh start. IMPORTANT: 'text_aligner' and 'pitch_extractor' are util pretraineds DO NOT ignore them.
35
+
36
  model_params:
37
  dim_in: 64
38
  hidden_dim: 512
39
  max_conv_dim: 512
40
  n_layer: 3
41
  n_mels: 80
 
 
42
  max_dur: 50 # maximum duration of a single phoneme
43
  style_dim: 128 # style vector size
44
 
 
47
  ASR_params:
48
  input_dim: 80
49
  hidden_dim: 256
 
50
  n_layers: 6
51
  token_embedding_dim: 512
52