t-tech
/

T-one

Automatic Speech Recognition

Model card Files Files and versions Community

T-one / config.json

sxdxfan's picture

Add model card, acoustic model checkpoint in safetensors format, ONNX model, tokenizer configs and KenLM model

e23fc14 3 days ago

history blame contribute delete

1.82 kB

	{
	"architectures": [
	"ToneForCTC"
	],
	"ctc_loss_reduction": "mean",
	"ctc_zero_infinity": true,
	"decoder_params": {
	"feat_in": 384,
	"vocabulary": [
	"а",
	"б",
	"в",
	"г",
	"д",
	"е",
	"ё",
	"ж",
	"з",
	"и",
	"й",
	"к",
	"л",
	"м",
	"н",
	"о",
	"п",
	"р",
	"с",
	"т",
	"у",
	"ф",
	"х",
	"ц",
	"ч",
	"ш",
	"щ",
	"ъ",
	"ы",
	"ь",
	"э",
	"ю",
	"я",
	" "
	]
	},
	"encoder_params": {
	"chunk_size": 10,
	"conv_kernel_size": 31,
	"d_model": 384,
	"dropout": 0.1,
	"dropout_att": 0.1,
	"feat_in": 64,
	"ff_expansion_factor": 4,
	"mhsa_state_size": 30,
	"mhsa_stateless_layers": 14,
	"n_heads": 8,
	"n_layers": 16,
	"reduction_factor": 2,
	"reduction_kernel_size": 3,
	"reduction_position": 6,
	"rope_dim": 32,
	"should_recompute_att_scores": [
	true,
	false,
	false,
	false,
	false,
	false,
	false,
	true,
	false,
	false,
	false,
	false,
	false,
	false,
	true,
	true
	],
	"subsampling_conv_channels": [
	32,
	64
	],
	"subsampling_kernel_size": [
	[
	11,
	21
	],
	[
	11,
	11
	]
	],
	"subsampling_strides": [
	[
	1,
	1
	],
	[
	3,
	1
	]
	],
	"upsample_position": 14
	},
	"feature_extraction_params": {
	"n_fft": 160,
	"n_mels": 64,
	"preemphasis_coefficient": 0.97,
	"sample_rate": 8000,
	"window_size": 0.02,
	"window_stride": 0.01
	},
	"pad_token_id": 34,
	"torch_dtype": "float32",
	"transformers_version": "4.41.2"
	}