yangwang825 commited on
Commit
c1ae634
·
1 Parent(s): 7c2eaae

Upload config

Browse files
Files changed (2) hide show
  1. config.json +3 -4
  2. configuration_mert.py +125 -0
config.json CHANGED
@@ -3,14 +3,13 @@
3
  "activation_dropout": 0.0,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
- "MERTForSequenceClassification"
7
  ],
8
  "attention_dropout": 0.1,
9
  "attention_relax": -1.0,
10
  "auto_map": {
11
- "AutoConfig": "configuration_MERT.MERTConfig",
12
- "AutoModel": "modeling_MERT.MERTModel",
13
- "AutoModelForAudioClassification": "modeling_mert.MERTForSequenceClassification"
14
  },
15
  "bos_token_id": 1,
16
  "classifier_proj_size": 256,
 
3
  "activation_dropout": 0.0,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
+ "MERTModel"
7
  ],
8
  "attention_dropout": 0.1,
9
  "attention_relax": -1.0,
10
  "auto_map": {
11
+ "AutoConfig": "configuration_mert.MERTConfig",
12
+ "AutoModel": "modeling_MERT.MERTModel"
 
13
  },
14
  "bos_token_id": 1,
15
  "classifier_proj_size": 256,
configuration_mert.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import operator
3
+ from transformers.utils import logging
4
+ from transformers.configuration_utils import PretrainedConfig
5
+
6
+ logger = logging.get_logger(__name__)
7
+
8
+
9
+ class MERTConfig(PretrainedConfig):
10
+ r"""
11
+ """
12
+ model_type = "mert_model"
13
+
14
+ def __init__(
15
+ self,
16
+ vocab_size=32,
17
+ hidden_size=768,
18
+ num_hidden_layers=12,
19
+ num_attention_heads=12,
20
+ intermediate_size=3072,
21
+ hidden_act="gelu",
22
+ hidden_dropout=0.1,
23
+ activation_dropout=0.1,
24
+ attention_dropout=0.1,
25
+ feat_proj_layer_norm=True,
26
+ feat_proj_dropout=0.0,
27
+ final_dropout=0.1,
28
+ layerdrop=0.1,
29
+ initializer_range=0.02,
30
+ layer_norm_eps=1e-5,
31
+ feat_extract_norm="group",
32
+ feat_extract_activation="gelu",
33
+ conv_dim=(512, 512, 512, 512, 512, 512, 512),
34
+ conv_stride=(5, 2, 2, 2, 2, 2, 2),
35
+ conv_kernel=(10, 3, 3, 3, 3, 2, 2),
36
+ conv_bias=False,
37
+ num_conv_pos_embeddings=128,
38
+ num_conv_pos_embedding_groups=16,
39
+ do_stable_layer_norm=False,
40
+ apply_spec_augment=True,
41
+ mask_time_prob=0.05,
42
+ mask_time_length=10,
43
+ mask_time_min_masks=2,
44
+ mask_feature_prob=0.0,
45
+ mask_feature_length=10,
46
+ mask_feature_min_masks=0,
47
+ ctc_loss_reduction="sum",
48
+ ctc_zero_infinity=False,
49
+ use_weighted_layer_sum=False,
50
+ classifier_proj_size=256,
51
+ pad_token_id=0,
52
+ bos_token_id=1,
53
+ eos_token_id=2,
54
+ feature_extractor_cqt=False,
55
+ feature_extractor_cqt_bins=336,
56
+ deepnorm=False,
57
+ attention_relax=-1.0,
58
+ **kwargs
59
+ ):
60
+ super().__init__(**kwargs, pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id)
61
+ self.hidden_size = hidden_size
62
+ self.feat_extract_norm = feat_extract_norm
63
+ self.feat_extract_activation = feat_extract_activation
64
+ self.conv_dim = list(conv_dim)
65
+ self.conv_stride = list(conv_stride)
66
+ self.conv_kernel = list(conv_kernel)
67
+ self.conv_bias = conv_bias
68
+ self.num_conv_pos_embeddings = num_conv_pos_embeddings
69
+ self.num_conv_pos_embedding_groups = num_conv_pos_embedding_groups
70
+ self.num_feat_extract_layers = len(self.conv_dim)
71
+ self.num_hidden_layers = num_hidden_layers
72
+ self.intermediate_size = intermediate_size
73
+ self.hidden_act = hidden_act
74
+ self.num_attention_heads = num_attention_heads
75
+ self.hidden_dropout = hidden_dropout
76
+ self.attention_dropout = attention_dropout
77
+ self.activation_dropout = activation_dropout
78
+ self.feat_proj_layer_norm = feat_proj_layer_norm
79
+ self.feat_proj_dropout = feat_proj_dropout
80
+ self.final_dropout = final_dropout
81
+ self.layerdrop = layerdrop
82
+ self.layer_norm_eps = layer_norm_eps
83
+ self.initializer_range = initializer_range
84
+ self.vocab_size = vocab_size
85
+ self.do_stable_layer_norm = do_stable_layer_norm
86
+ self.use_weighted_layer_sum = use_weighted_layer_sum
87
+ self.classifier_proj_size = classifier_proj_size
88
+
89
+ if (
90
+ (len(self.conv_stride) != self.num_feat_extract_layers)
91
+ or (len(self.conv_kernel) != self.num_feat_extract_layers)
92
+ or (len(self.conv_dim) != self.num_feat_extract_layers)
93
+ ):
94
+ raise ValueError(
95
+ "Configuration for convolutional layers is incorrect. It is required that `len(config.conv_dim)` =="
96
+ " `len(config.conv_stride)` == `len(config.conv_kernel)`, but is `len(config.conv_dim) ="
97
+ f" {len(self.conv_dim)}`, `len(config.conv_stride) = {len(self.conv_stride)}`,"
98
+ f" `len(config.conv_kernel) = {len(self.conv_kernel)}`."
99
+ )
100
+
101
+ # fine-tuning config parameters for SpecAugment: https://arxiv.org/abs/1904.08779
102
+ self.apply_spec_augment = apply_spec_augment
103
+ self.mask_time_prob = mask_time_prob
104
+ self.mask_time_length = mask_time_length
105
+ self.mask_time_min_masks = mask_time_min_masks
106
+ self.mask_feature_prob = mask_feature_prob
107
+ self.mask_feature_length = mask_feature_length
108
+ self.mask_feature_min_masks = mask_feature_min_masks
109
+
110
+ # ctc loss
111
+ self.ctc_loss_reduction = ctc_loss_reduction
112
+ self.ctc_zero_infinity = ctc_zero_infinity
113
+
114
+ # cqt feature extractor
115
+ self.feature_extractor_cqt = feature_extractor_cqt
116
+ self.feature_extractor_cqt_bins = feature_extractor_cqt_bins
117
+
118
+ # deepnorm: up-scale weighted residual conection + down-scale initial value transformer encoder
119
+ self.deepnorm = deepnorm
120
+
121
+ self.attention_relax = attention_relax
122
+
123
+ @property
124
+ def inputs_to_logits_ratio(self):
125
+ return functools.reduce(operator.mul, self.conv_stride, 1)