{ "architectures": [ "SiglipForImageClassification" ], "id2label": { "0": "aircraft_carrier", "1": "alarm_clock", "2": "ant", "3": "anvil", "4": "asparagus", "5": "axe", "6": "banana", "7": "basket", "8": "bathtub", "9": "bear", "10": "bee", "11": "bird", "12": "blackberry", "13": "blueberry", "14": "bottlecap", "15": "broccoli", "16": "bus", "17": "butterfly", "18": "cactus", "19": "cake", "20": "calculator", "21": "camel", "22": "camera", "23": "candle", "24": "cannon", "25": "canoe", "26": "carrot", "27": "castle", "28": "cat", "29": "ceiling_fan", "30": "cell_phone", "31": "cello", "32": "chair", "33": "chandelier", "34": "coffee_cup", "35": "compass", "36": "computer", "37": "cow", "38": "crab", "39": "crocodile", "40": "cruise_ship", "41": "dog", "42": "dolphin", "43": "dragon", "44": "drums", "45": "duck", "46": "dumbbell", "47": "elephant", "48": "eyeglasses", "49": "feather", "50": "fence", "51": "fish", "52": "flamingo", "53": "flower", "54": "foot", "55": "fork", "56": "frog", "57": "giraffe", "58": "goatee", "59": "grapes", "60": "guitar", "61": "hammer", "62": "helicopter", "63": "helmet", "64": "horse", "65": "kangaroo", "66": "lantern", "67": "laptop", "68": "leaf", "69": "lion", "70": "lipstick", "71": "lobster", "72": "microphone", "73": "monkey", "74": "mosquito", "75": "mouse", "76": "mug", "77": "mushroom", "78": "onion", "79": "panda", "80": "peanut", "81": "pear", "82": "peas", "83": "pencil", "84": "penguin", "85": "pig", "86": "pillow", "87": "pineapple", "88": "potato", "89": "power_outlet", "90": "purse", "91": "rabbit", "92": "raccoon", "93": "rhinoceros", "94": "rifle", "95": "saxophone", "96": "screwdriver", "97": "sea_turtle", "98": "see_saw", "99": "sheep", "100": "shoe", "101": "skateboard", "102": "snake", "103": "speedboat", "104": "spider", "105": "squirrel", "106": "strawberry", "107": "streetlight", "108": "string_bean", "109": "submarine", "110": "swan", "111": "table", "112": "teapot", "113": "teddy-bear", "114": "television", "115": "the_Eiffel_Tower", "116": "the_Great_Wall_of_China", "117": "tiger", "118": "toe", "119": "train", "120": "truck", "121": "umbrella", "122": "vase", "123": "watermelon", "124": "whale", "125": "zebra" }, "initializer_factor": 1.0, "label2id": { "aircraft_carrier": 0, "alarm_clock": 1, "ant": 2, "anvil": 3, "asparagus": 4, "axe": 5, "banana": 6, "basket": 7, "bathtub": 8, "bear": 9, "bee": 10, "bird": 11, "blackberry": 12, "blueberry": 13, "bottlecap": 14, "broccoli": 15, "bus": 16, "butterfly": 17, "cactus": 18, "cake": 19, "calculator": 20, "camel": 21, "camera": 22, "candle": 23, "cannon": 24, "canoe": 25, "carrot": 26, "castle": 27, "cat": 28, "ceiling_fan": 29, "cell_phone": 30, "cello": 31, "chair": 32, "chandelier": 33, "coffee_cup": 34, "compass": 35, "computer": 36, "cow": 37, "crab": 38, "crocodile": 39, "cruise_ship": 40, "dog": 41, "dolphin": 42, "dragon": 43, "drums": 44, "duck": 45, "dumbbell": 46, "elephant": 47, "eyeglasses": 48, "feather": 49, "fence": 50, "fish": 51, "flamingo": 52, "flower": 53, "foot": 54, "fork": 55, "frog": 56, "giraffe": 57, "goatee": 58, "grapes": 59, "guitar": 60, "hammer": 61, "helicopter": 62, "helmet": 63, "horse": 64, "kangaroo": 65, "lantern": 66, "laptop": 67, "leaf": 68, "lion": 69, "lipstick": 70, "lobster": 71, "microphone": 72, "monkey": 73, "mosquito": 74, "mouse": 75, "mug": 76, "mushroom": 77, "onion": 78, "panda": 79, "peanut": 80, "pear": 81, "peas": 82, "pencil": 83, "penguin": 84, "pig": 85, "pillow": 86, "pineapple": 87, "potato": 88, "power_outlet": 89, "purse": 90, "rabbit": 91, "raccoon": 92, "rhinoceros": 93, "rifle": 94, "saxophone": 95, "screwdriver": 96, "sea_turtle": 97, "see_saw": 98, "sheep": 99, "shoe": 100, "skateboard": 101, "snake": 102, "speedboat": 103, "spider": 104, "squirrel": 105, "strawberry": 106, "streetlight": 107, "string_bean": 108, "submarine": 109, "swan": 110, "table": 111, "teapot": 112, "teddy-bear": 113, "television": 114, "the_Eiffel_Tower": 115, "the_Great_Wall_of_China": 116, "tiger": 117, "toe": 118, "train": 119, "truck": 120, "umbrella": 121, "vase": 122, "watermelon": 123, "whale": 124, "zebra": 125 }, "model_type": "siglip", "problem_type": "single_label_classification", "text_config": { "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "intermediate_size": 3072, "layer_norm_eps": 1e-06, "max_position_embeddings": 64, "model_type": "siglip_text_model", "num_attention_heads": 12, "num_hidden_layers": 12, "projection_size": 768, "torch_dtype": "float32", "vocab_size": 256000 }, "torch_dtype": "float32", "transformers_version": "4.51.0.dev0", "vision_config": { "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "image_size": 224, "intermediate_size": 3072, "layer_norm_eps": 1e-06, "model_type": "siglip_vision_model", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "patch_size": 16, "torch_dtype": "float32" } }