Daporte commited on
Commit
76c0443
·
verified ·
1 Parent(s): faceee6

Create quantizer_config.py

Browse files
Files changed (1) hide show
  1. quantizer_config.py +167 -0
quantizer_config.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import PretrainedConfig
3
+ from typing import List, Optional
4
+
5
+ class QuantizerConfig(PretrainedConfig):
6
+ model_type = "prosody_quantizer"
7
+
8
+ def __init__(
9
+ self,
10
+ # VQ parameters
11
+ l_bins: int = 320,
12
+ emb_width: int = 64,
13
+ mu: float = 0.99,
14
+ levels: int = 1,
15
+
16
+ # Encoder parameters
17
+ encoder_input_emb_width: int = 3,
18
+ encoder_output_emb_width: int = 64,
19
+ encoder_levels: int = 1,
20
+ encoder_downs_t: List[int] = [4],
21
+ encoder_strides_t: List[int] = [2],
22
+ encoder_width: int = 32,
23
+ encoder_depth: int = 4,
24
+ encoder_m_conv: float = 1.0,
25
+ encoder_dilation_growth_rate: int = 3,
26
+
27
+ # Decoder parameters
28
+ decoder_input_emb_width: int = 3,
29
+ decoder_output_emb_width: int = 64,
30
+ decoder_levels: int = 1,
31
+ decoder_downs_t: List[int] = [4],
32
+ decoder_strides_t: List[int] = [2],
33
+ decoder_width: int = 32,
34
+ decoder_depth: int = 4,
35
+ decoder_m_conv: float = 1.0,
36
+ decoder_dilation_growth_rate: int = 3,
37
+
38
+ # Training parameters
39
+ lambda_commit: float = 0.02,
40
+ f0_normalize: bool = True,
41
+ intensity_normalize: bool = True,
42
+ multispkr: str = "single",
43
+ f0_feats: bool = False,
44
+ f0_median: bool = False,
45
+
46
+ # Optional training hyperparameters
47
+ learning_rate: float = 0.0002,
48
+ adam_b1: float = 0.8,
49
+ adam_b2: float = 0.99,
50
+ lr_decay: float = 0.999,
51
+ **kwargs
52
+ ):
53
+ super().__init__(**kwargs)
54
+
55
+ # VQ parameters
56
+ self.l_bins = l_bins
57
+ self.emb_width = emb_width
58
+ self.mu = mu
59
+ self.levels = levels
60
+
61
+ # Encoder parameters
62
+ self.encoder_input_emb_width = encoder_input_emb_width
63
+ self.encoder_output_emb_width = encoder_output_emb_width
64
+ self.encoder_levels = encoder_levels
65
+ self.encoder_downs_t = encoder_downs_t
66
+ self.encoder_strides_t = encoder_strides_t
67
+ self.encoder_width = encoder_width
68
+ self.encoder_depth = encoder_depth
69
+ self.encoder_m_conv = encoder_m_conv
70
+ self.encoder_dilation_growth_rate = encoder_dilation_growth_rate
71
+
72
+ # Decoder parameters
73
+ self.decoder_input_emb_width = decoder_input_emb_width
74
+ self.decoder_output_emb_width = decoder_output_emb_width
75
+ self.decoder_levels = decoder_levels
76
+ self.decoder_downs_t = decoder_downs_t
77
+ self.decoder_strides_t = decoder_strides_t
78
+ self.decoder_width = decoder_width
79
+ self.decoder_depth = decoder_depth
80
+ self.decoder_m_conv = decoder_m_conv
81
+ self.decoder_dilation_growth_rate = decoder_dilation_growth_rate
82
+
83
+ # Training parameters
84
+ self.lambda_commit = lambda_commit
85
+ self.f0_normalize = f0_normalize
86
+ self.intensity_normalize = intensity_normalize
87
+ self.multispkr = multispkr
88
+ self.f0_feats = f0_feats
89
+ self.f0_median = f0_median
90
+
91
+ # Training hyperparameters
92
+ self.learning_rate = learning_rate
93
+ self.adam_b1 = adam_b1
94
+ self.adam_b2 = adam_b2
95
+ self.lr_decay = lr_decay
96
+
97
+ @property
98
+ def f0_vq_params(self):
99
+ return {
100
+ "l_bins": self.l_bins,
101
+ "emb_width": self.emb_width,
102
+ "mu": self.mu,
103
+ "levels": self.levels
104
+ }
105
+
106
+ @property
107
+ def f0_encoder_params(self):
108
+ return {
109
+ "input_emb_width": self.encoder_input_emb_width,
110
+ "output_emb_width": self.encoder_output_emb_width,
111
+ "levels": self.encoder_levels,
112
+ "downs_t": self.encoder_downs_t,
113
+ "strides_t": self.encoder_strides_t,
114
+ "width": self.encoder_width,
115
+ "depth": self.encoder_depth,
116
+ "m_conv": self.encoder_m_conv,
117
+ "dilation_growth_rate": self.encoder_dilation_growth_rate
118
+ }
119
+
120
+ @property
121
+ def f0_decoder_params(self):
122
+ return {
123
+ "input_emb_width": self.decoder_input_emb_width,
124
+ "output_emb_width": self.decoder_output_emb_width,
125
+ "levels": self.decoder_levels,
126
+ "downs_t": self.decoder_downs_t,
127
+ "strides_t": self.decoder_strides_t,
128
+ "width": self.decoder_width,
129
+ "depth": self.decoder_depth,
130
+ "m_conv": self.decoder_m_conv,
131
+ "dilation_growth_rate": self.decoder_dilation_growth_rate
132
+ }
133
+
134
+ @classmethod
135
+ def from_yaml(cls, yaml_path: str):
136
+ """Load config from yaml file"""
137
+ import yaml
138
+ with open(yaml_path, 'r') as f:
139
+ config = yaml.safe_load(f)
140
+
141
+ # Convert yaml config to kwargs
142
+ kwargs = {
143
+ # VQ params
144
+ **{k: v for k, v in config['f0_vq_params'].items()},
145
+
146
+ # Encoder params
147
+ **{f"encoder_{k}": v for k, v in config['f0_encoder_params'].items()},
148
+
149
+ # Decoder params
150
+ **{f"decoder_{k}": v for k, v in config['f0_decoder_params'].items()},
151
+
152
+ # Training params
153
+ "lambda_commit": config.get('lambda_commit', 0.02),
154
+ "f0_normalize": config.get('f0_normalize', True),
155
+ "intensity_normalize": config.get('intensity_normalize', True),
156
+ "multispkr": config.get('multispkr', "single"),
157
+ "f0_feats": config.get('f0_feats', False),
158
+ "f0_median": config.get('f0_median', False),
159
+
160
+ # Training hyperparams
161
+ "learning_rate": config.get('learning_rate', 0.0002),
162
+ "adam_b1": config.get('adam_b1', 0.8),
163
+ "adam_b2": config.get('adam_b2', 0.99),
164
+ "lr_decay": config.get('lr_decay', 0.999),
165
+ }
166
+
167
+ return cls(**kwargs)