Upload folder using huggingface_hub
Browse files- config.gin +142 -0
- model.ckpt +3 -0
config.gin
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Macros:
|
2 |
+
# ==============================================================================
|
3 |
+
new_freq = 24000
|
4 |
+
|
5 |
+
# Parameters for AudioDataModule:
|
6 |
+
# ==============================================================================
|
7 |
+
AudioDataModule.num_workers = 20
|
8 |
+
|
9 |
+
# Parameters for AudioDataset:
|
10 |
+
# ==============================================================================
|
11 |
+
AudioDataset.half_precision = True
|
12 |
+
AudioDataset.mono = True
|
13 |
+
AudioDataset.new_freq = %new_freq
|
14 |
+
AudioDataset.num_frames = 480000
|
15 |
+
AudioDataset.orig_freq = 16000
|
16 |
+
|
17 |
+
# Parameters for build_dev_datamodule:
|
18 |
+
# ==============================================================================
|
19 |
+
build_dev_datamodule.datamodule = @discotube
|
20 |
+
|
21 |
+
# Parameters for build_module:
|
22 |
+
# ==============================================================================
|
23 |
+
build_module.ckpt_path = 'model.ckpt'
|
24 |
+
build_module.module = @modules.maskingmodel.MaskingModel
|
25 |
+
build_module.net = @nets.conformer.Conformer
|
26 |
+
build_module.representation = \
|
27 |
+
[@nets.cqt.CQT,
|
28 |
+
@nets.encodec.EnCodec,
|
29 |
+
@nets.melspectrogram.MelSpectrogram,
|
30 |
+
@nets.waveform.Waveform]
|
31 |
+
|
32 |
+
# Parameters for Conformer:
|
33 |
+
# ==============================================================================
|
34 |
+
Conformer.alpha_deepnorm = 2.6321480259049848
|
35 |
+
Conformer.beta_deepnorm = 0.022386873579657126
|
36 |
+
Conformer.conv_kernel_size = 5
|
37 |
+
Conformer.depth = 24
|
38 |
+
Conformer.dropout = 0.2
|
39 |
+
Conformer.embed_dim = 1024
|
40 |
+
Conformer.input_dropout = 0.0
|
41 |
+
Conformer.mlp_ratio = 4.0
|
42 |
+
Conformer.mlp_residual_factor = 4.0
|
43 |
+
Conformer.num_heads = 8
|
44 |
+
Conformer.num_patches = None
|
45 |
+
Conformer.use_deepnorm = True
|
46 |
+
Conformer.use_rope = True
|
47 |
+
|
48 |
+
# Parameters for CosineAnnealingCallback:
|
49 |
+
# ==============================================================================
|
50 |
+
CosineAnnealingCallback.eta_min = 1e-07
|
51 |
+
CosineAnnealingCallback.warmup_steps = 30000
|
52 |
+
|
53 |
+
# Parameters for CQT:
|
54 |
+
# ==============================================================================
|
55 |
+
CQT.bins_per_octave = 24
|
56 |
+
CQT.f_min = 32.703
|
57 |
+
CQT.hop_len = 320
|
58 |
+
CQT.logC = True
|
59 |
+
CQT.magnitude = True
|
60 |
+
CQT.n_bins = 188
|
61 |
+
CQT.norm_mean = 4.754879065310596
|
62 |
+
CQT.norm_std = 1.9055732535255916
|
63 |
+
CQT.patch_size = (188, 3)
|
64 |
+
CQT.power = 2
|
65 |
+
CQT.sr = %new_freq
|
66 |
+
|
67 |
+
# Parameters for DiscotubeAudioDataModule:
|
68 |
+
# ==============================================================================
|
69 |
+
DiscotubeAudioDataModule.batch_size = 20
|
70 |
+
DiscotubeAudioDataModule.data_dir = ''
|
71 |
+
DiscotubeAudioDataModule.filelist_train = ''
|
72 |
+
DiscotubeAudioDataModule.filelist_val = ''
|
73 |
+
|
74 |
+
# Parameters for EnCodec:
|
75 |
+
# ==============================================================================
|
76 |
+
EnCodec.norm_type = 'global'
|
77 |
+
EnCodec.orig_sr = %new_freq
|
78 |
+
EnCodec.patch_size = (128, 3)
|
79 |
+
EnCodec.stats_path = None
|
80 |
+
EnCodec.weights_path = 'facebook/encodec_24khz'
|
81 |
+
|
82 |
+
# Parameters for FiniteScalarQuantizer:
|
83 |
+
# ==============================================================================
|
84 |
+
FiniteScalarQuantizer.levels = [6, 6, 6, 6, 6]
|
85 |
+
FiniteScalarQuantizer.preserve_symmetry = True
|
86 |
+
|
87 |
+
# Parameters for MaskingModel:
|
88 |
+
# ==============================================================================
|
89 |
+
MaskingModel.codebook_dim = 1
|
90 |
+
MaskingModel.codebook_size = 7776
|
91 |
+
MaskingModel.diff_input = False
|
92 |
+
MaskingModel.input_representation = @nets.waveform.Waveform
|
93 |
+
MaskingModel.lr = 0.0001
|
94 |
+
MaskingModel.mask_prob = 0.6
|
95 |
+
MaskingModel.mask_seconds = 0.4
|
96 |
+
MaskingModel.num_codebooks = 1
|
97 |
+
MaskingModel.plot_tokens = False
|
98 |
+
MaskingModel.quantizer_type = 'finite_scalar_quantizer'
|
99 |
+
MaskingModel.seed = 0
|
100 |
+
MaskingModel.weight_decay = 0.01
|
101 |
+
|
102 |
+
# Parameters for MelSpectrogram:
|
103 |
+
# ==============================================================================
|
104 |
+
MelSpectrogram.freq_mask_param = 0
|
105 |
+
MelSpectrogram.hop_len = 320
|
106 |
+
MelSpectrogram.mel_scale = 'slaney'
|
107 |
+
MelSpectrogram.n_mel = 96
|
108 |
+
MelSpectrogram.norm = 'slaney'
|
109 |
+
MelSpectrogram.norm_mean = 2.06755686098554
|
110 |
+
MelSpectrogram.norm_std = 1.268292820667291
|
111 |
+
MelSpectrogram.patch_size = (96, 3)
|
112 |
+
MelSpectrogram.power = 2
|
113 |
+
MelSpectrogram.sr = %new_freq
|
114 |
+
MelSpectrogram.stretch_factor = 1
|
115 |
+
MelSpectrogram.time_mask_param = 0
|
116 |
+
MelSpectrogram.win_len = 512
|
117 |
+
|
118 |
+
# Parameters for train:
|
119 |
+
# ==============================================================================
|
120 |
+
train.params = \
|
121 |
+
{'accelerator': 'gpu',
|
122 |
+
'devices': 4,
|
123 |
+
'log_every_n_steps': 50,
|
124 |
+
'max_steps': 400000,
|
125 |
+
'num_nodes': 2,
|
126 |
+
'num_sanity_val_steps': 0,
|
127 |
+
'precision': 'bf16-mixed',
|
128 |
+
'strategy': 'ddp_find_unused_parameters_true'}
|
129 |
+
train.wandb_params = \
|
130 |
+
{'entity': 'mtg-upf',
|
131 |
+
'group': 'masking_conformer',
|
132 |
+
'name': 'mask_conf_large_au_to_all_25hz_fsq',
|
133 |
+
'offline': True,
|
134 |
+
'project': 'mtg-ssl',
|
135 |
+
'save_dir': '/gpfs/projects/upf97/logs/'}
|
136 |
+
|
137 |
+
# Parameters for Waveform:
|
138 |
+
# ==============================================================================
|
139 |
+
Waveform.norm_mean = None
|
140 |
+
Waveform.norm_std = None
|
141 |
+
Waveform.patch_size = (1, 960)
|
142 |
+
Waveform.sr = %new_freq
|
model.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bf2a7e0ff047a60de8d68e8caa77941ae31e71df5034402e007b34a485e726b
|
3 |
+
size 2325292802
|