File size: 1,635 Bytes
45e1a77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
{
"do_normalize": true,
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
"feature_size": 1,
"padding_side": "right",
"padding_value": 0.0,
"return_attention_mask": true,
"sampling_rate": 16000,
"preprocessing": {
"audio_normalization": {
"method": "peak",
"target_level": -23.0,
"headroom_db": 3.0
},
"spectral_features": {
"mel_filters": 128,
"window_size_ms": 25,
"stride_ms": 10,
"fmin": 50,
"fmax": 8000,
"htk_compat": true
},
"augmentation": {
"time_masking": {
"enabled": true,
"time_mask_param": 100,
"num_masks": 2
},
"freq_masking": {
"enabled": true,
"freq_mask_param": 27,
"num_masks": 2
},
"noise": {
"enabled": true,
"noise_types": ["gaussian", "pink"],
"snr_range": [5, 20]
}
},
"signal_enhancement": {
"vad": {
"enabled": true,
"threshold": 0.5,
"min_speech_duration_ms": 250
},
"noise_reduction": {
"enabled": true,
"method": "spectral_gating",
"stationary_threshold": 1.5
}
}
},
"advanced_settings": {
"feature_extraction": {
"normalize_means": true,
"normalize_vars": true,
"deltas_order": 2,
"cmvn_window": 300
},
"resampling": {
"method": "kaiser_best",
"lowpass_filter_width": 64,
"rolloff": 0.945,
"beta": 14.0
},
"performance": {
"num_workers": 4,
"pin_memory": true,
"prefetch_factor": 2,
"persistent_workers": true
}
}
}
|