{ "vit_hidden_dim": 768, "vit_inter_dim": 3072, "vit_patch_size": 16, "vit_img_size": 512, "vit_n_heads": 12, "vit_dropout": 0.0, "vit_n_blocks": 12, "vit_ln_eps": 1e-06, "vit_cls_flag": false, "vit_model_type": "google/siglip2-base-patch16-512", "lm_hidden_dim": 960, "lm_inter_dim": 2560, "lm_rms_eps": 1e-05, "lm_re_base": 100000, "lm_max_position_embeddings": 8192, "lm_base_vocab_size": 49152, "extra_token_amount": 17, "lm_vocab_size": 49169, "lm_n_heads": 15, "lm_n_kv_heads": 5, "lm_dropout": 0.0, "lm_n_blocks": 32, "lm_attn_scaling": 1.0, "lm_max_length": 1024, "lm_use_tokens": false, "lm_tie_weights": true, "lm_model_type": "HuggingFaceTB/SmolLM2-360M-Instruct", "lm_tokenizer": "HuggingFaceTB/SmolLM2-360M-Instruct", "lm_chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", "mp_pixel_shuffle_factor": 4, "mp_image_token_length": 64, "max_img_size": 1024, "vlm_extra_tokens": { "image_token": "<|image|>", "r1c1": "", "r1c2": "", "r1c3": "", "r1c4": "", "r2c1": "", "r2c2": "", "r2c3": "", "r2c4": "", "r3c1": "", "r3c2": "", "r3c3": "", "r3c4": "", "r4c1": "", "r4c2": "", "r4c3": "", "r4c4": "" }, "vlm_load_backbone_weights": true, "vlm_checkpoint_path": "checkpoints", "hf_repo_name": "nanoVLM" }