|
{ |
|
"adapter_config": { |
|
"attention_dropout": 0.0, |
|
"float32_attention": true, |
|
"head_dim": 72, |
|
"hidden_act": "silu", |
|
"hidden_size": 1152, |
|
"image_feature_dropout": 0.0, |
|
"image_padding_embed": null, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"model_type": "", |
|
"num_attention_heads": 16, |
|
"num_key_value_heads": 16, |
|
"residual_dropout": 0.0, |
|
"text_hidden_size": 3584, |
|
"vit_layers": [ |
|
-3, |
|
-9 |
|
] |
|
}, |
|
"architectures": [ |
|
"MolmoActForActionReasoning" |
|
], |
|
"auto_map": { |
|
"AutoConfig": "configuration_molmoact.MolmoActConfig", |
|
"AutoModelForImageTextToText": "modeling_molmoact.MolmoActForActionReasoning" |
|
}, |
|
"image_patch_id": 152066, |
|
"initializer_range": 0.02, |
|
"llm_config": { |
|
"additional_vocab_size": 128, |
|
"attention_dropout": 0.0, |
|
"embedding_dropout": 0.0, |
|
"head_dim": 128, |
|
"hidden_act": "silu", |
|
"hidden_size": 3584, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"layer_norm_eps": 1e-06, |
|
"max_position_embeddings": 4096, |
|
"model_type": "molmoact_llm", |
|
"norm_after": false, |
|
"num_attention_heads": 28, |
|
"num_hidden_layers": 28, |
|
"num_key_value_heads": 4, |
|
"qk_norm_type": "olmo", |
|
"qkv_bias": true, |
|
"residual_dropout": 0.0, |
|
"rope_scaling": null, |
|
"rope_theta": 1000000.0, |
|
"use_cache": true, |
|
"use_qk_norm": false, |
|
"vocab_size": 152064 |
|
}, |
|
"model_type": "molmoact", |
|
"n_action_bins": 256, |
|
"norm_stats": { |
|
"fractal20220817_data": { |
|
"action": { |
|
"mask": [ |
|
true, |
|
true, |
|
true, |
|
true, |
|
true, |
|
true, |
|
false |
|
], |
|
"max": [ |
|
2.9984593391418457, |
|
22.09052848815918, |
|
2.7507524490356445, |
|
1.570636510848999, |
|
1.5321086645126343, |
|
1.5691522359848022, |
|
1.0 |
|
], |
|
"mean": [ |
|
0.006987582892179489, |
|
0.006265917327255011, |
|
-0.01262515690177679, |
|
0.04333311319351196, |
|
-0.005756212864071131, |
|
0.0009130256366916001, |
|
0.5354204773902893 |
|
], |
|
"min": [ |
|
-2.0204520225524902, |
|
-5.497899532318115, |
|
-2.031663417816162, |
|
-1.569917917251587, |
|
-1.569892168045044, |
|
-1.570419430732727, |
|
0.0 |
|
], |
|
"q01": [ |
|
-0.22453527510166169, |
|
-0.14820013284683228, |
|
-0.231589707583189, |
|
-0.3517994859814644, |
|
-0.4193011274933815, |
|
-0.43643461108207704, |
|
0.0 |
|
], |
|
"q99": [ |
|
0.17824687153100965, |
|
0.14938379630446405, |
|
0.21842354819178575, |
|
0.5892666035890578, |
|
0.35272657424211445, |
|
0.44796681255102094, |
|
1.0 |
|
], |
|
"std": [ |
|
0.0692116990685463, |
|
0.05970962345600128, |
|
0.07353084534406662, |
|
0.15610496699810028, |
|
0.13164450228214264, |
|
0.14593800902366638, |
|
0.497110515832901 |
|
] |
|
}, |
|
"num_trajectories": 87212, |
|
"num_transitions": 3786400, |
|
"proprio": { |
|
"max": [ |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0 |
|
], |
|
"mean": [ |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0 |
|
], |
|
"min": [ |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0 |
|
], |
|
"q01": [ |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0 |
|
], |
|
"q99": [ |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0 |
|
], |
|
"std": [ |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0, |
|
0.0 |
|
] |
|
} |
|
} |
|
}, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.52.3", |
|
"use_cache": true, |
|
"vit_config": { |
|
"attention_dropout": 0.0, |
|
"float32_attention": true, |
|
"head_dim": 72, |
|
"hidden_act": "gelu_pytorch_tanh", |
|
"hidden_size": 1152, |
|
"image_default_input_size": [ |
|
378, |
|
378 |
|
], |
|
"image_num_pos": 729, |
|
"image_patch_size": 14, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 4304, |
|
"layer_norm_eps": 1e-06, |
|
"model_type": "molmoact_vit", |
|
"num_attention_heads": 16, |
|
"num_hidden_layers": 27, |
|
"num_key_value_heads": 16, |
|
"patch_bias": true, |
|
"pre_layernorm": false, |
|
"residual_dropout": 0.0, |
|
"use_cls_token": false |
|
} |
|
} |