File size: 4,827 Bytes
efb3216 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# Macros:
# ==============================================================================
new_freq = 24000
# Parameters for AudioDataModule:
# ==============================================================================
AudioDataModule.num_workers = 20
# Parameters for AudioDataset:
# ==============================================================================
AudioDataset.half_precision = True
AudioDataset.mono = True
AudioDataset.new_freq = %new_freq
AudioDataset.num_frames = 480000
AudioDataset.orig_freq = 16000
# Parameters for build_dev_datamodule:
# ==============================================================================
build_dev_datamodule.datamodule = @discotube
# Parameters for build_module:
# ==============================================================================
build_module.ckpt_path = 'model.ckpt'
build_module.module = @modules.maskingmodel.MaskingModel
build_module.net = @nets.conformer.Conformer
build_module.representation = \
[@nets.cqt.CQT,
@nets.encodec.EnCodec,
@nets.melspectrogram.MelSpectrogram,
@nets.waveform.Waveform]
# Parameters for Conformer:
# ==============================================================================
Conformer.alpha_deepnorm = 2.6321480259049848
Conformer.beta_deepnorm = 0.022386873579657126
Conformer.conv_kernel_size = 5
Conformer.depth = 24
Conformer.dropout = 0.2
Conformer.embed_dim = 1024
Conformer.input_dropout = 0.0
Conformer.mlp_ratio = 4.0
Conformer.mlp_residual_factor = 4.0
Conformer.num_heads = 8
Conformer.num_patches = None
Conformer.use_deepnorm = True
Conformer.use_rope = True
# Parameters for CosineAnnealingCallback:
# ==============================================================================
CosineAnnealingCallback.eta_min = 1e-07
CosineAnnealingCallback.warmup_steps = 30000
# Parameters for CQT:
# ==============================================================================
CQT.bins_per_octave = 24
CQT.f_min = 32.703
CQT.hop_len = 320
CQT.logC = True
CQT.magnitude = True
CQT.n_bins = 188
CQT.norm_mean = 4.754879065310596
CQT.norm_std = 1.9055732535255916
CQT.patch_size = (188, 3)
CQT.power = 2
CQT.sr = %new_freq
# Parameters for DiscotubeAudioDataModule:
# ==============================================================================
DiscotubeAudioDataModule.batch_size = 20
DiscotubeAudioDataModule.data_dir = ''
DiscotubeAudioDataModule.filelist_train = ''
DiscotubeAudioDataModule.filelist_val = ''
# Parameters for EnCodec:
# ==============================================================================
EnCodec.norm_type = 'global'
EnCodec.orig_sr = %new_freq
EnCodec.patch_size = (128, 3)
EnCodec.stats_path = None
EnCodec.weights_path = 'facebook/encodec_24khz'
# Parameters for FiniteScalarQuantizer:
# ==============================================================================
FiniteScalarQuantizer.levels = [6, 6, 6, 6, 6]
FiniteScalarQuantizer.preserve_symmetry = True
# Parameters for MaskingModel:
# ==============================================================================
MaskingModel.codebook_dim = 1
MaskingModel.codebook_size = 7776
MaskingModel.diff_input = False
MaskingModel.input_representation = @nets.waveform.Waveform
MaskingModel.lr = 0.0001
MaskingModel.mask_prob = 0.6
MaskingModel.mask_seconds = 0.4
MaskingModel.num_codebooks = 1
MaskingModel.plot_tokens = False
MaskingModel.quantizer_type = 'finite_scalar_quantizer'
MaskingModel.seed = 0
MaskingModel.weight_decay = 0.01
# Parameters for MelSpectrogram:
# ==============================================================================
MelSpectrogram.freq_mask_param = 0
MelSpectrogram.hop_len = 320
MelSpectrogram.mel_scale = 'slaney'
MelSpectrogram.n_mel = 96
MelSpectrogram.norm = 'slaney'
MelSpectrogram.norm_mean = 2.06755686098554
MelSpectrogram.norm_std = 1.268292820667291
MelSpectrogram.patch_size = (96, 3)
MelSpectrogram.power = 2
MelSpectrogram.sr = %new_freq
MelSpectrogram.stretch_factor = 1
MelSpectrogram.time_mask_param = 0
MelSpectrogram.win_len = 512
# Parameters for train:
# ==============================================================================
train.params = \
{'accelerator': 'gpu',
'devices': 4,
'log_every_n_steps': 50,
'max_steps': 400000,
'num_nodes': 2,
'num_sanity_val_steps': 0,
'precision': 'bf16-mixed',
'strategy': 'ddp_find_unused_parameters_true'}
train.wandb_params = \
{'entity': 'mtg-upf',
'group': 'masking_conformer',
'name': 'mask_conf_large_au_to_all_25hz_fsq',
'offline': True,
'project': 'mtg-ssl',
'save_dir': '/gpfs/projects/upf97/logs/'}
# Parameters for Waveform:
# ==============================================================================
Waveform.norm_mean = None
Waveform.norm_std = None
Waveform.patch_size = (1, 960)
Waveform.sr = %new_freq
|