{ | |
"architectures": [ | |
"ToneForCTC" | |
], | |
"ctc_loss_reduction": "mean", | |
"ctc_zero_infinity": true, | |
"decoder_params": { | |
"feat_in": 384, | |
"vocabulary": [ | |
"а", | |
"б", | |
"в", | |
"г", | |
"д", | |
"е", | |
"ё", | |
"ж", | |
"з", | |
"и", | |
"й", | |
"к", | |
"л", | |
"м", | |
"н", | |
"о", | |
"п", | |
"р", | |
"с", | |
"т", | |
"у", | |
"ф", | |
"х", | |
"ц", | |
"ч", | |
"ш", | |
"щ", | |
"ъ", | |
"ы", | |
"ь", | |
"э", | |
"ю", | |
"я", | |
" " | |
] | |
}, | |
"encoder_params": { | |
"chunk_size": 10, | |
"conv_kernel_size": 31, | |
"d_model": 384, | |
"dropout": 0.1, | |
"dropout_att": 0.1, | |
"feat_in": 64, | |
"ff_expansion_factor": 4, | |
"mhsa_state_size": 30, | |
"mhsa_stateless_layers": 14, | |
"n_heads": 8, | |
"n_layers": 16, | |
"reduction_factor": 2, | |
"reduction_kernel_size": 3, | |
"reduction_position": 6, | |
"rope_dim": 32, | |
"should_recompute_att_scores": [ | |
true, | |
false, | |
false, | |
false, | |
false, | |
false, | |
false, | |
true, | |
false, | |
false, | |
false, | |
false, | |
false, | |
false, | |
true, | |
true | |
], | |
"subsampling_conv_channels": [ | |
32, | |
64 | |
], | |
"subsampling_kernel_size": [ | |
[ | |
11, | |
21 | |
], | |
[ | |
11, | |
11 | |
] | |
], | |
"subsampling_strides": [ | |
[ | |
1, | |
1 | |
], | |
[ | |
3, | |
1 | |
] | |
], | |
"upsample_position": 14 | |
}, | |
"feature_extraction_params": { | |
"n_fft": 160, | |
"n_mels": 64, | |
"preemphasis_coefficient": 0.97, | |
"sample_rate": 8000, | |
"window_size": 0.02, | |
"window_stride": 0.01 | |
}, | |
"pad_token_id": 34, | |
"torch_dtype": "float32", | |
"transformers_version": "4.41.2" | |
} | |