{ "architectures": [ "MMGPTStep1ForCausalLMV4" ], "model_type": "mmgpt_step1_v2", "hidden_size": 12288, "intermediate_size": 31232, "num_attention_heads": 96, "num_attention_groups": 8, "num_hidden_layers": 88, "max_seq_len": 999999, "vocab_size": 74752, "rms_norm_eps": 1e-05, "torch_dtype": "bfloat16", "im_end_token": "", "im_patch_token": "", "im_start_token": "", "image_token_len": 169, "use_im_start_end": true, "vision_select_layer": -1, "understand_projector_stride": 2, "vit_scale": 1.0, "projector_bias": false, "vision_tower_config": { "hidden_size": 1792, "output_hidden_size": 4096, "image_size": 728, "intermediate_size": 15360, "num_attention_heads": 16, "num_hidden_layers": 63, "patch_size": 14 } }