p-alonso commited on
Commit
efb3216
·
verified ·
1 Parent(s): 0aa15f2

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.gin +142 -0
  2. model.ckpt +3 -0
config.gin ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Macros:
2
+ # ==============================================================================
3
+ new_freq = 24000
4
+
5
+ # Parameters for AudioDataModule:
6
+ # ==============================================================================
7
+ AudioDataModule.num_workers = 20
8
+
9
+ # Parameters for AudioDataset:
10
+ # ==============================================================================
11
+ AudioDataset.half_precision = True
12
+ AudioDataset.mono = True
13
+ AudioDataset.new_freq = %new_freq
14
+ AudioDataset.num_frames = 480000
15
+ AudioDataset.orig_freq = 16000
16
+
17
+ # Parameters for build_dev_datamodule:
18
+ # ==============================================================================
19
+ build_dev_datamodule.datamodule = @discotube
20
+
21
+ # Parameters for build_module:
22
+ # ==============================================================================
23
+ build_module.ckpt_path = 'model.ckpt'
24
+ build_module.module = @modules.maskingmodel.MaskingModel
25
+ build_module.net = @nets.conformer.Conformer
26
+ build_module.representation = \
27
+ [@nets.cqt.CQT,
28
+ @nets.encodec.EnCodec,
29
+ @nets.melspectrogram.MelSpectrogram,
30
+ @nets.waveform.Waveform]
31
+
32
+ # Parameters for Conformer:
33
+ # ==============================================================================
34
+ Conformer.alpha_deepnorm = 2.6321480259049848
35
+ Conformer.beta_deepnorm = 0.022386873579657126
36
+ Conformer.conv_kernel_size = 5
37
+ Conformer.depth = 24
38
+ Conformer.dropout = 0.2
39
+ Conformer.embed_dim = 1024
40
+ Conformer.input_dropout = 0.0
41
+ Conformer.mlp_ratio = 4.0
42
+ Conformer.mlp_residual_factor = 4.0
43
+ Conformer.num_heads = 8
44
+ Conformer.num_patches = None
45
+ Conformer.use_deepnorm = True
46
+ Conformer.use_rope = True
47
+
48
+ # Parameters for CosineAnnealingCallback:
49
+ # ==============================================================================
50
+ CosineAnnealingCallback.eta_min = 1e-07
51
+ CosineAnnealingCallback.warmup_steps = 30000
52
+
53
+ # Parameters for CQT:
54
+ # ==============================================================================
55
+ CQT.bins_per_octave = 24
56
+ CQT.f_min = 32.703
57
+ CQT.hop_len = 320
58
+ CQT.logC = True
59
+ CQT.magnitude = True
60
+ CQT.n_bins = 188
61
+ CQT.norm_mean = 4.754879065310596
62
+ CQT.norm_std = 1.9055732535255916
63
+ CQT.patch_size = (188, 3)
64
+ CQT.power = 2
65
+ CQT.sr = %new_freq
66
+
67
+ # Parameters for DiscotubeAudioDataModule:
68
+ # ==============================================================================
69
+ DiscotubeAudioDataModule.batch_size = 20
70
+ DiscotubeAudioDataModule.data_dir = ''
71
+ DiscotubeAudioDataModule.filelist_train = ''
72
+ DiscotubeAudioDataModule.filelist_val = ''
73
+
74
+ # Parameters for EnCodec:
75
+ # ==============================================================================
76
+ EnCodec.norm_type = 'global'
77
+ EnCodec.orig_sr = %new_freq
78
+ EnCodec.patch_size = (128, 3)
79
+ EnCodec.stats_path = None
80
+ EnCodec.weights_path = 'facebook/encodec_24khz'
81
+
82
+ # Parameters for FiniteScalarQuantizer:
83
+ # ==============================================================================
84
+ FiniteScalarQuantizer.levels = [6, 6, 6, 6, 6]
85
+ FiniteScalarQuantizer.preserve_symmetry = True
86
+
87
+ # Parameters for MaskingModel:
88
+ # ==============================================================================
89
+ MaskingModel.codebook_dim = 1
90
+ MaskingModel.codebook_size = 7776
91
+ MaskingModel.diff_input = False
92
+ MaskingModel.input_representation = @nets.waveform.Waveform
93
+ MaskingModel.lr = 0.0001
94
+ MaskingModel.mask_prob = 0.6
95
+ MaskingModel.mask_seconds = 0.4
96
+ MaskingModel.num_codebooks = 1
97
+ MaskingModel.plot_tokens = False
98
+ MaskingModel.quantizer_type = 'finite_scalar_quantizer'
99
+ MaskingModel.seed = 0
100
+ MaskingModel.weight_decay = 0.01
101
+
102
+ # Parameters for MelSpectrogram:
103
+ # ==============================================================================
104
+ MelSpectrogram.freq_mask_param = 0
105
+ MelSpectrogram.hop_len = 320
106
+ MelSpectrogram.mel_scale = 'slaney'
107
+ MelSpectrogram.n_mel = 96
108
+ MelSpectrogram.norm = 'slaney'
109
+ MelSpectrogram.norm_mean = 2.06755686098554
110
+ MelSpectrogram.norm_std = 1.268292820667291
111
+ MelSpectrogram.patch_size = (96, 3)
112
+ MelSpectrogram.power = 2
113
+ MelSpectrogram.sr = %new_freq
114
+ MelSpectrogram.stretch_factor = 1
115
+ MelSpectrogram.time_mask_param = 0
116
+ MelSpectrogram.win_len = 512
117
+
118
+ # Parameters for train:
119
+ # ==============================================================================
120
+ train.params = \
121
+ {'accelerator': 'gpu',
122
+ 'devices': 4,
123
+ 'log_every_n_steps': 50,
124
+ 'max_steps': 400000,
125
+ 'num_nodes': 2,
126
+ 'num_sanity_val_steps': 0,
127
+ 'precision': 'bf16-mixed',
128
+ 'strategy': 'ddp_find_unused_parameters_true'}
129
+ train.wandb_params = \
130
+ {'entity': 'mtg-upf',
131
+ 'group': 'masking_conformer',
132
+ 'name': 'mask_conf_large_au_to_all_25hz_fsq',
133
+ 'offline': True,
134
+ 'project': 'mtg-ssl',
135
+ 'save_dir': '/gpfs/projects/upf97/logs/'}
136
+
137
+ # Parameters for Waveform:
138
+ # ==============================================================================
139
+ Waveform.norm_mean = None
140
+ Waveform.norm_std = None
141
+ Waveform.patch_size = (1, 960)
142
+ Waveform.sr = %new_freq
model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bf2a7e0ff047a60de8d68e8caa77941ae31e71df5034402e007b34a485e726b
3
+ size 2325292802