MolmoAct-7B-D-0812 / config.json
hqfang's picture
Upload folder using huggingface_hub
2876330 verified
raw
history blame
3.75 kB
{
"adapter_config": {
"attention_dropout": 0.0,
"float32_attention": true,
"head_dim": 72,
"hidden_act": "silu",
"hidden_size": 1152,
"image_feature_dropout": 0.0,
"image_padding_embed": null,
"initializer_range": 0.02,
"intermediate_size": 18944,
"model_type": "",
"num_attention_heads": 16,
"num_key_value_heads": 16,
"residual_dropout": 0.0,
"text_hidden_size": 3584,
"vit_layers": [
-3,
-9
]
},
"architectures": [
"MolmoActForActionReasoning"
],
"auto_map": {
"AutoConfig": "configuration_molmoact.MolmoActConfig",
"AutoModelForImageTextToText": "modeling_molmoact.MolmoActForActionReasoning"
},
"image_patch_id": 152066,
"initializer_range": 0.02,
"llm_config": {
"additional_vocab_size": 128,
"attention_dropout": 0.0,
"embedding_dropout": 0.0,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 3584,
"initializer_range": 0.02,
"intermediate_size": 18944,
"layer_norm_eps": 1e-06,
"max_position_embeddings": 4096,
"model_type": "molmoact_llm",
"norm_after": false,
"num_attention_heads": 28,
"num_hidden_layers": 28,
"num_key_value_heads": 4,
"qk_norm_type": "olmo",
"qkv_bias": true,
"residual_dropout": 0.0,
"rope_scaling": null,
"rope_theta": 1000000.0,
"use_cache": true,
"use_qk_norm": false,
"vocab_size": 152064
},
"model_type": "molmoact",
"n_action_bins": 256,
"norm_stats": {
"molmoact": {
"action": {
"max": [
0.06042003631591797,
0.09417290985584259,
0.07019275426864624,
0.2616892158985138,
0.11751057207584381,
0.16968433558940887,
1.0
],
"mean": [
0.0005706787342205644,
0.0002448957529850304,
-3.5987635783385485e-05,
0.00021597897284664214,
-0.0004896928439848125,
-0.000241481073317118,
0.5570635199546814
],
"min": [
-0.07434078305959702,
-0.07339745759963989,
-0.06539416313171387,
-0.1688285619020462,
-0.10289879888296127,
-0.2667275667190552,
0.0
],
"q01": [
-0.01538565568625927,
-0.021047022193670273,
-0.01688069850206375,
-0.044314172118902206,
-0.03890235349535942,
-0.04788423702120781,
0.0
],
"q99": [
0.014661382883787155,
0.026515591889619827,
0.021398313343524933,
0.04216696694493294,
0.03401297703385353,
0.04957397282123566,
1.0
],
"std": [
0.005207270849496126,
0.007506529800593853,
0.006415561307221651,
0.013248044066131115,
0.010928540490567684,
0.014873150736093521,
0.49715080857276917
]
},
"num_entries": 1560068
}
},
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.52.3",
"use_cache": true,
"vit_config": {
"attention_dropout": 0.0,
"float32_attention": true,
"head_dim": 72,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 1152,
"image_default_input_size": [
378,
378
],
"image_num_pos": 729,
"image_patch_size": 14,
"initializer_range": 0.02,
"intermediate_size": 4304,
"layer_norm_eps": 1e-06,
"model_type": "molmoact_vit",
"num_attention_heads": 16,
"num_hidden_layers": 27,
"num_key_value_heads": 16,
"patch_bias": true,
"pre_layernorm": false,
"residual_dropout": 0.0,
"use_cls_token": false
}
}