p-alonso's picture
Upload folder using huggingface_hub
efb3216 verified
# Macros:
# ==============================================================================
new_freq = 24000
# Parameters for AudioDataModule:
# ==============================================================================
AudioDataModule.num_workers = 20
# Parameters for AudioDataset:
# ==============================================================================
AudioDataset.half_precision = True
AudioDataset.mono = True
AudioDataset.new_freq = %new_freq
AudioDataset.num_frames = 480000
AudioDataset.orig_freq = 16000
# Parameters for build_dev_datamodule:
# ==============================================================================
build_dev_datamodule.datamodule = @discotube
# Parameters for build_module:
# ==============================================================================
build_module.ckpt_path = 'model.ckpt'
build_module.module = @modules.maskingmodel.MaskingModel
build_module.net = @nets.conformer.Conformer
build_module.representation = \
[@nets.cqt.CQT,
@nets.encodec.EnCodec,
@nets.melspectrogram.MelSpectrogram,
@nets.waveform.Waveform]
# Parameters for Conformer:
# ==============================================================================
Conformer.alpha_deepnorm = 2.6321480259049848
Conformer.beta_deepnorm = 0.022386873579657126
Conformer.conv_kernel_size = 5
Conformer.depth = 24
Conformer.dropout = 0.2
Conformer.embed_dim = 1024
Conformer.input_dropout = 0.0
Conformer.mlp_ratio = 4.0
Conformer.mlp_residual_factor = 4.0
Conformer.num_heads = 8
Conformer.num_patches = None
Conformer.use_deepnorm = True
Conformer.use_rope = True
# Parameters for CosineAnnealingCallback:
# ==============================================================================
CosineAnnealingCallback.eta_min = 1e-07
CosineAnnealingCallback.warmup_steps = 30000
# Parameters for CQT:
# ==============================================================================
CQT.bins_per_octave = 24
CQT.f_min = 32.703
CQT.hop_len = 320
CQT.logC = True
CQT.magnitude = True
CQT.n_bins = 188
CQT.norm_mean = 4.754879065310596
CQT.norm_std = 1.9055732535255916
CQT.patch_size = (188, 3)
CQT.power = 2
CQT.sr = %new_freq
# Parameters for DiscotubeAudioDataModule:
# ==============================================================================
DiscotubeAudioDataModule.batch_size = 20
DiscotubeAudioDataModule.data_dir = ''
DiscotubeAudioDataModule.filelist_train = ''
DiscotubeAudioDataModule.filelist_val = ''
# Parameters for EnCodec:
# ==============================================================================
EnCodec.norm_type = 'global'
EnCodec.orig_sr = %new_freq
EnCodec.patch_size = (128, 3)
EnCodec.stats_path = None
EnCodec.weights_path = 'facebook/encodec_24khz'
# Parameters for FiniteScalarQuantizer:
# ==============================================================================
FiniteScalarQuantizer.levels = [6, 6, 6, 6, 6]
FiniteScalarQuantizer.preserve_symmetry = True
# Parameters for MaskingModel:
# ==============================================================================
MaskingModel.codebook_dim = 1
MaskingModel.codebook_size = 7776
MaskingModel.diff_input = False
MaskingModel.input_representation = @nets.waveform.Waveform
MaskingModel.lr = 0.0001
MaskingModel.mask_prob = 0.6
MaskingModel.mask_seconds = 0.4
MaskingModel.num_codebooks = 1
MaskingModel.plot_tokens = False
MaskingModel.quantizer_type = 'finite_scalar_quantizer'
MaskingModel.seed = 0
MaskingModel.weight_decay = 0.01
# Parameters for MelSpectrogram:
# ==============================================================================
MelSpectrogram.freq_mask_param = 0
MelSpectrogram.hop_len = 320
MelSpectrogram.mel_scale = 'slaney'
MelSpectrogram.n_mel = 96
MelSpectrogram.norm = 'slaney'
MelSpectrogram.norm_mean = 2.06755686098554
MelSpectrogram.norm_std = 1.268292820667291
MelSpectrogram.patch_size = (96, 3)
MelSpectrogram.power = 2
MelSpectrogram.sr = %new_freq
MelSpectrogram.stretch_factor = 1
MelSpectrogram.time_mask_param = 0
MelSpectrogram.win_len = 512
# Parameters for train:
# ==============================================================================
train.params = \
{'accelerator': 'gpu',
'devices': 4,
'log_every_n_steps': 50,
'max_steps': 400000,
'num_nodes': 2,
'num_sanity_val_steps': 0,
'precision': 'bf16-mixed',
'strategy': 'ddp_find_unused_parameters_true'}
train.wandb_params = \
{'entity': 'mtg-upf',
'group': 'masking_conformer',
'name': 'mask_conf_large_au_to_all_25hz_fsq',
'offline': True,
'project': 'mtg-ssl',
'save_dir': '/gpfs/projects/upf97/logs/'}
# Parameters for Waveform:
# ==============================================================================
Waveform.norm_mean = None
Waveform.norm_std = None
Waveform.patch_size = (1, 960)
Waveform.sr = %new_freq