{
  "architectures": [
    "MMGPTStep1ForCausalLMV4"
  ],
  "model_type": "mmgpt_step1_v2",
  "hidden_size": 12288,
  "intermediate_size": 31232,
  "num_attention_heads": 96,
  "num_attention_groups": 8,
  "num_hidden_layers": 88,
  "max_seq_len": 999999,
  "vocab_size": 74752,
  "rms_norm_eps": 1e-05,
  "torch_dtype": "bfloat16",
  "im_end_token": "<im_end>",
  "im_patch_token": "<im_patch>",
  "im_start_token": "<im_start>",
  "image_token_len": 169,
  "use_im_start_end": true,
  "vision_select_layer": -1,
  "understand_projector_stride": 2,
  "vit_scale": 1.0,
  "projector_bias": false,
  "vision_tower_config": {
    "hidden_size": 1792,
    "output_hidden_size": 4096,
    "image_size": 728,
    "intermediate_size": 15360,
    "num_attention_heads": 16,
    "num_hidden_layers": 63,
    "patch_size": 14
  }
}