File size: 1,821 Bytes
e23fc14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
{
"architectures": [
"ToneForCTC"
],
"ctc_loss_reduction": "mean",
"ctc_zero_infinity": true,
"decoder_params": {
"feat_in": 384,
"vocabulary": [
"а",
"б",
"в",
"г",
"д",
"е",
"ё",
"ж",
"з",
"и",
"й",
"к",
"л",
"м",
"н",
"о",
"п",
"р",
"с",
"т",
"у",
"ф",
"х",
"ц",
"ч",
"ш",
"щ",
"ъ",
"ы",
"ь",
"э",
"ю",
"я",
" "
]
},
"encoder_params": {
"chunk_size": 10,
"conv_kernel_size": 31,
"d_model": 384,
"dropout": 0.1,
"dropout_att": 0.1,
"feat_in": 64,
"ff_expansion_factor": 4,
"mhsa_state_size": 30,
"mhsa_stateless_layers": 14,
"n_heads": 8,
"n_layers": 16,
"reduction_factor": 2,
"reduction_kernel_size": 3,
"reduction_position": 6,
"rope_dim": 32,
"should_recompute_att_scores": [
true,
false,
false,
false,
false,
false,
false,
true,
false,
false,
false,
false,
false,
false,
true,
true
],
"subsampling_conv_channels": [
32,
64
],
"subsampling_kernel_size": [
[
11,
21
],
[
11,
11
]
],
"subsampling_strides": [
[
1,
1
],
[
3,
1
]
],
"upsample_position": 14
},
"feature_extraction_params": {
"n_fft": 160,
"n_mels": 64,
"preemphasis_coefficient": 0.97,
"sample_rate": 8000,
"window_size": 0.02,
"window_stride": 0.01
},
"pad_token_id": 34,
"torch_dtype": "float32",
"transformers_version": "4.41.2"
}
|