Image Classification
Transformers
Safetensors
English
siglip
Sketch-126-DomainNet
Sketch-126-DomainNet / config.json
prithivMLmods's picture
Upload folder using huggingface_hub
1b0e675 verified
{
"architectures": [
"SiglipForImageClassification"
],
"id2label": {
"0": "aircraft_carrier",
"1": "alarm_clock",
"2": "ant",
"3": "anvil",
"4": "asparagus",
"5": "axe",
"6": "banana",
"7": "basket",
"8": "bathtub",
"9": "bear",
"10": "bee",
"11": "bird",
"12": "blackberry",
"13": "blueberry",
"14": "bottlecap",
"15": "broccoli",
"16": "bus",
"17": "butterfly",
"18": "cactus",
"19": "cake",
"20": "calculator",
"21": "camel",
"22": "camera",
"23": "candle",
"24": "cannon",
"25": "canoe",
"26": "carrot",
"27": "castle",
"28": "cat",
"29": "ceiling_fan",
"30": "cell_phone",
"31": "cello",
"32": "chair",
"33": "chandelier",
"34": "coffee_cup",
"35": "compass",
"36": "computer",
"37": "cow",
"38": "crab",
"39": "crocodile",
"40": "cruise_ship",
"41": "dog",
"42": "dolphin",
"43": "dragon",
"44": "drums",
"45": "duck",
"46": "dumbbell",
"47": "elephant",
"48": "eyeglasses",
"49": "feather",
"50": "fence",
"51": "fish",
"52": "flamingo",
"53": "flower",
"54": "foot",
"55": "fork",
"56": "frog",
"57": "giraffe",
"58": "goatee",
"59": "grapes",
"60": "guitar",
"61": "hammer",
"62": "helicopter",
"63": "helmet",
"64": "horse",
"65": "kangaroo",
"66": "lantern",
"67": "laptop",
"68": "leaf",
"69": "lion",
"70": "lipstick",
"71": "lobster",
"72": "microphone",
"73": "monkey",
"74": "mosquito",
"75": "mouse",
"76": "mug",
"77": "mushroom",
"78": "onion",
"79": "panda",
"80": "peanut",
"81": "pear",
"82": "peas",
"83": "pencil",
"84": "penguin",
"85": "pig",
"86": "pillow",
"87": "pineapple",
"88": "potato",
"89": "power_outlet",
"90": "purse",
"91": "rabbit",
"92": "raccoon",
"93": "rhinoceros",
"94": "rifle",
"95": "saxophone",
"96": "screwdriver",
"97": "sea_turtle",
"98": "see_saw",
"99": "sheep",
"100": "shoe",
"101": "skateboard",
"102": "snake",
"103": "speedboat",
"104": "spider",
"105": "squirrel",
"106": "strawberry",
"107": "streetlight",
"108": "string_bean",
"109": "submarine",
"110": "swan",
"111": "table",
"112": "teapot",
"113": "teddy-bear",
"114": "television",
"115": "the_Eiffel_Tower",
"116": "the_Great_Wall_of_China",
"117": "tiger",
"118": "toe",
"119": "train",
"120": "truck",
"121": "umbrella",
"122": "vase",
"123": "watermelon",
"124": "whale",
"125": "zebra"
},
"initializer_factor": 1.0,
"label2id": {
"aircraft_carrier": 0,
"alarm_clock": 1,
"ant": 2,
"anvil": 3,
"asparagus": 4,
"axe": 5,
"banana": 6,
"basket": 7,
"bathtub": 8,
"bear": 9,
"bee": 10,
"bird": 11,
"blackberry": 12,
"blueberry": 13,
"bottlecap": 14,
"broccoli": 15,
"bus": 16,
"butterfly": 17,
"cactus": 18,
"cake": 19,
"calculator": 20,
"camel": 21,
"camera": 22,
"candle": 23,
"cannon": 24,
"canoe": 25,
"carrot": 26,
"castle": 27,
"cat": 28,
"ceiling_fan": 29,
"cell_phone": 30,
"cello": 31,
"chair": 32,
"chandelier": 33,
"coffee_cup": 34,
"compass": 35,
"computer": 36,
"cow": 37,
"crab": 38,
"crocodile": 39,
"cruise_ship": 40,
"dog": 41,
"dolphin": 42,
"dragon": 43,
"drums": 44,
"duck": 45,
"dumbbell": 46,
"elephant": 47,
"eyeglasses": 48,
"feather": 49,
"fence": 50,
"fish": 51,
"flamingo": 52,
"flower": 53,
"foot": 54,
"fork": 55,
"frog": 56,
"giraffe": 57,
"goatee": 58,
"grapes": 59,
"guitar": 60,
"hammer": 61,
"helicopter": 62,
"helmet": 63,
"horse": 64,
"kangaroo": 65,
"lantern": 66,
"laptop": 67,
"leaf": 68,
"lion": 69,
"lipstick": 70,
"lobster": 71,
"microphone": 72,
"monkey": 73,
"mosquito": 74,
"mouse": 75,
"mug": 76,
"mushroom": 77,
"onion": 78,
"panda": 79,
"peanut": 80,
"pear": 81,
"peas": 82,
"pencil": 83,
"penguin": 84,
"pig": 85,
"pillow": 86,
"pineapple": 87,
"potato": 88,
"power_outlet": 89,
"purse": 90,
"rabbit": 91,
"raccoon": 92,
"rhinoceros": 93,
"rifle": 94,
"saxophone": 95,
"screwdriver": 96,
"sea_turtle": 97,
"see_saw": 98,
"sheep": 99,
"shoe": 100,
"skateboard": 101,
"snake": 102,
"speedboat": 103,
"spider": 104,
"squirrel": 105,
"strawberry": 106,
"streetlight": 107,
"string_bean": 108,
"submarine": 109,
"swan": 110,
"table": 111,
"teapot": 112,
"teddy-bear": 113,
"television": 114,
"the_Eiffel_Tower": 115,
"the_Great_Wall_of_China": 116,
"tiger": 117,
"toe": 118,
"train": 119,
"truck": 120,
"umbrella": 121,
"vase": 122,
"watermelon": 123,
"whale": 124,
"zebra": 125
},
"model_type": "siglip",
"problem_type": "single_label_classification",
"text_config": {
"attention_dropout": 0.0,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 768,
"intermediate_size": 3072,
"layer_norm_eps": 1e-06,
"max_position_embeddings": 64,
"model_type": "siglip_text_model",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"projection_size": 768,
"torch_dtype": "float32",
"vocab_size": 256000
},
"torch_dtype": "float32",
"transformers_version": "4.51.0.dev0",
"vision_config": {
"attention_dropout": 0.0,
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 768,
"image_size": 224,
"intermediate_size": 3072,
"layer_norm_eps": 1e-06,
"model_type": "siglip_vision_model",
"num_attention_heads": 12,
"num_channels": 3,
"num_hidden_layers": 12,
"patch_size": 16,
"torch_dtype": "float32"
}
}