Step-Audio-AQAA / config.json
buyun's picture
initial
a672832 verified
{
"architectures": [
"MMGPTStep1ForCausalLMV4"
],
"model_type": "mmgpt_step1_v2",
"hidden_size": 12288,
"intermediate_size": 31232,
"num_attention_heads": 96,
"num_attention_groups": 8,
"num_hidden_layers": 88,
"max_seq_len": 999999,
"vocab_size": 74752,
"rms_norm_eps": 1e-05,
"torch_dtype": "bfloat16",
"im_end_token": "<im_end>",
"im_patch_token": "<im_patch>",
"im_start_token": "<im_start>",
"image_token_len": 169,
"use_im_start_end": true,
"vision_select_layer": -1,
"understand_projector_stride": 2,
"vit_scale": 1.0,
"projector_bias": false,
"vision_tower_config": {
"hidden_size": 1792,
"output_hidden_size": 4096,
"image_size": 728,
"intermediate_size": 15360,
"num_attention_heads": 16,
"num_hidden_layers": 63,
"patch_size": 14
}
}