{ "adapter_config": { "attention_dropout": 0.0, "float32_attention": true, "head_dim": 72, "hidden_act": "silu", "hidden_size": 1152, "image_feature_dropout": 0.0, "image_padding_embed": null, "initializer_range": 0.02, "intermediate_size": 18944, "model_type": "", "num_attention_heads": 16, "num_key_value_heads": 16, "residual_dropout": 0.0, "text_hidden_size": 3584, "vit_layers": [ -3, -9 ] }, "architectures": [ "MolmoActForActionReasoning" ], "auto_map": { "AutoConfig": "configuration_molmoact.MolmoActConfig", "AutoModelForImageTextToText": "modeling_molmoact.MolmoActForActionReasoning" }, "image_patch_id": 152066, "initializer_range": 0.02, "llm_config": { "additional_vocab_size": 128, "attention_dropout": 0.0, "embedding_dropout": 0.0, "head_dim": 128, "hidden_act": "silu", "hidden_size": 3584, "initializer_range": 0.02, "intermediate_size": 18944, "layer_norm_eps": 1e-06, "max_position_embeddings": 4096, "model_type": "molmoact_llm", "norm_after": false, "num_attention_heads": 28, "num_hidden_layers": 28, "num_key_value_heads": 4, "qk_norm_type": "olmo", "qkv_bias": true, "residual_dropout": 0.0, "rope_scaling": null, "rope_theta": 1000000.0, "use_cache": true, "use_qk_norm": false, "vocab_size": 152064 }, "model_type": "molmoact", "n_action_bins": 256, "norm_stats": { "molmoact": { "action": { "max": [ 0.06042003631591797, 0.09417290985584259, 0.07019275426864624, 0.2616892158985138, 0.11751057207584381, 0.16968433558940887, 1.0 ], "mean": [ 0.0005706787342205644, 0.0002448957529850304, -3.5987635783385485e-05, 0.00021597897284664214, -0.0004896928439848125, -0.000241481073317118, 0.5570635199546814 ], "min": [ -0.07434078305959702, -0.07339745759963989, -0.06539416313171387, -0.1688285619020462, -0.10289879888296127, -0.2667275667190552, 0.0 ], "q01": [ -0.01538565568625927, -0.021047022193670273, -0.01688069850206375, -0.044314172118902206, -0.03890235349535942, -0.04788423702120781, 0.0 ], "q99": [ 0.014661382883787155, 0.026515591889619827, 0.021398313343524933, 0.04216696694493294, 0.03401297703385353, 0.04957397282123566, 1.0 ], "std": [ 0.005207270849496126, 0.007506529800593853, 0.006415561307221651, 0.013248044066131115, 0.010928540490567684, 0.014873150736093521, 0.49715080857276917 ] }, "num_entries": 1560068 } }, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.52.3", "use_cache": true, "vit_config": { "attention_dropout": 0.0, "float32_attention": true, "head_dim": 72, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "image_default_input_size": [ 378, 378 ], "image_num_pos": 729, "image_patch_size": 14, "initializer_range": 0.02, "intermediate_size": 4304, "layer_norm_eps": 1e-06, "model_type": "molmoact_vit", "num_attention_heads": 16, "num_hidden_layers": 27, "num_key_value_heads": 16, "patch_bias": true, "pre_layernorm": false, "residual_dropout": 0.0, "use_cls_token": false } }