nemo-speaker-count-speakernet / model_config.yaml
huseinzol05's picture
Upload model_config.yaml with huggingface_hub
35de8b9
raw
history blame
2.09 kB
decoder:
_target_: nemo.collections.asr.modules.SpeakerDecoder
angular: false
emb_sizes: 256
feat_in: 1500
num_classes: 7
pool_mode: xvector
encoder:
_target_: nemo.collections.asr.modules.ConvASREncoder
activation: relu
conv_mask: true
feat_in: 64
jasper:
- dilation:
- 1
dropout: 0.5
filters: 512
kernel:
- 3
repeat: 1
residual: true
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.5
filters: 512
kernel:
- 7
repeat: 2
residual: true
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.5
filters: 512
kernel:
- 11
repeat: 2
residual: true
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.5
filters: 512
kernel:
- 15
repeat: 2
residual: true
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.0
filters: 1500
kernel:
- 1
repeat: 1
residual: false
separable: true
stride:
- 1
loss:
margin: 0.2
scale: 30
optim:
lr: 0.006
momentum: 0.9
name: sgd
sched:
min_lr: 0.0001
name: CosineAnnealing
warmup_ratio: 0.1
weight_decay: 0.001
preprocessor:
_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
dither: 1.0e-05
features: 64
frame_splicing: 1
n_fft: 512
normalize: per_feature
sample_rate: 16000
stft_conv: false
window: hann
window_size: 0.02
window_stride: 0.01
target: nemo.collections.asr.models.label_models.EncDecSpeakerLabelModel
train_ds:
augmentor:
noise:
manifest_path: /ws/manifests/raid/musan/musan_music_noise_manifest_dur8.json
max_snr_db: 15
min_snr_db: 5
prob: 0.2
batch_size: 64
labels: null
manifest_filepath: /ws/manifests/raid/combined/train_manifest.json
num_workers: 4
sample_rate: 16000
shuffle: true
time_length: 8
validation_ds:
batch_size: 64
labels: null
manifest_filepath: /ws/manifests/raid/voxceleb/small_manifest.json
num_workers: 1
sample_rate: 16000
shuffle: false
time_length: 8