{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "sleeping", "1": "door", "2": "yellow", "3": "red and white", "4": "fern", "5": "on left", "6": "australia", "7": "dr pepper", "8": "not there", "9": "cage", "10": "airplane", "11": "11:00", "12": "game", "13": "flat", "14": "snake", "15": "windows", "16": "hard", "17": "on building", "18": "soccer", "19": "gone", "20": "birthday", "21": "sign", "22": "batting", "23": "bookshelf", "24": "cloudy", "25": "snowy", "26": "ice", "27": "white and red", "28": "market", "29": "beagle", "30": "pasta", "31": "front", "32": "white", "33": "16", "34": "baseball cap", "35": "listening", "36": "woods", "37": "young", "38": "afternoon", "39": "platform", "40": "wii", "41": "dog bed", "42": "toward", "43": "not in service", "44": "polo", "45": "boy", "46": "t shirt", "47": "necklace", "48": "pork", "49": "residential", "50": "away", "51": "on stove", "52": "mexican", "53": "skateboard", "54": "on floor", "55": "branch", "56": "yes", "57": "urban", "58": "dusk", "59": "books", "60": "bunk", "61": "phone", "62": "yellow and blue", "63": "living room", "64": "neither", "65": "hand", "66": "umbrellas", "67": "orange and white", "68": "paris", "69": "sedan", "70": "mirror", "71": "barn", "72": "jumping", "73": "design", "74": "picnic table", "75": "on bench", "76": "train tracks", "77": "laptop", "78": "angry", "79": "hair", "80": "lighting", "81": "pelican", "82": "decoration", "83": "plane", "84": "crossing", "85": "ocean", "86": "in field", "87": "fair", "88": "not sure", "89": "looking", "90": "ceiling", "91": "shirt", "92": "king", "93": "chains", "94": "red and green", "95": "brown and white", "96": "pointing", "97": "football", "98": "playing video games", "99": "double decker", "100": "out", "101": "in air", "102": "kite", "103": "cream", "104": "scarf", "105": "cap", "106": "ground", "107": "little", "108": "tennis ball", "109": "bedroom", "110": "bmw", "111": "mouth", "112": "blue", "113": "motorcycles", "114": "cow", "115": "zebra", "116": "clock", "117": "broccoli", "118": "sad", "119": "white and blue", "120": "outside", "121": "standing", "122": "transportation", "123": "dessert", "124": "baseball glove", "125": "jump", "126": "silver and black", "127": "dirty", "128": "dancing", "129": "video game", "130": "french", "131": "long", "132": "26", "133": "safari", "134": "cheese", "135": "13", "136": "unknown", "137": "10", "138": "plate", "139": "calico", "140": "sidewalk", "141": "soccer ball", "142": "not possible", "143": "electric", "144": "11:15", "145": "frosting", "146": "riding", "147": "email", "148": "strawberries", "149": "tower", "150": "go", "151": "player", "152": "falling", "153": "leather", "154": "black and white", "155": "visor", "156": "goggles", "157": "smile", "158": "12", "159": "large", "160": "north", "161": "khaki", "162": "wine", "163": "relaxing", "164": "pink and white", "165": "spoon", "166": "humans", "167": "cotton", "168": "roses", "169": "ceramic", "170": "cart", "171": "green and yellow", "172": "desert", "173": "united states", "174": "for balance", "175": "11:05", "176": "asia", "177": "few", "178": "drinking", "179": "gray", "180": "asphalt", "181": "dirt", "182": "donuts", "183": "sail", "184": "japan", "185": "tie", "186": "model", "187": "clock tower", "188": "hawaii", "189": "landing", "190": "collar", "191": "relish", "192": "winter", "193": "frame", "194": "cubs", "195": "oriental", "196": "14", "197": "wallet", "198": "no parking", "199": "all", "200": "waiting", "201": "german", "202": "big ben", "203": "africa", "204": "shopping", "205": "blueberries", "206": "station", "207": "plaid", "208": "linoleum", "209": "plain", "210": "batter", "211": "bus stop", "212": "5", "213": "boredom", "214": "squares", "215": "toothpicks", "216": "mud", "217": "stripes", "218": "burger", "219": "student", "220": "driving", "221": "toothpick", "222": "blue and yellow", "223": "string", "224": "ana", "225": "double", "226": "sky", "227": "women", "228": "jeep", "229": "curtains", "230": "spectators", "231": "single", "232": "in corner", "233": "fence", "234": "boat", "235": "honda", "236": "clay", "237": "silver", "238": "tracks", "239": "cell phone", "240": "weeds", "241": "glasses", "242": "oven", "243": "background", "244": "hp", "245": "don't know", "246": "bricks", "247": "2010", "248": "buffalo", "249": "flower", "250": "arrow", "251": "jockey", "252": "orange and yellow", "253": "happy", "254": "parked", "255": "feathers", "256": "man", "257": "twin", "258": "button up", "259": "no train", "260": "flag", "261": "serve", "262": "drinks", "263": "can't see", "264": "birds", "265": "to right", "266": "earring", "267": "no sign", "268": "snowboard", "269": "casserole", "270": "graffiti", "271": "microsoft", "272": "napkin", "273": "window sill", "274": "11:10", "275": "looking out window", "276": "off", "277": "vegetables", "278": "11:20", "279": "150", "280": "window", "281": "bread", "282": "working", "283": "duck", "284": "no dog", "285": "glove", "286": "on phone", "287": "ducks", "288": "fishing", "289": "blanket", "290": "resting", "291": "24", "292": "skateboarding", "293": "natural", "294": "chicago", "295": "lunch", "296": "checkered", "297": "color", "298": "road", "299": "dell", "300": "headband", "301": "jacket", "302": "canada", "303": "john", "304": "baked", "305": "rectangle", "306": "hamburger", "307": "lake", "308": "pacific", "309": "bananas", "310": "trick", "311": "7:35", "312": "green", "313": "10:10", "314": "low", "315": "white and black", "316": "hydrant", "317": "table", "318": "tulips", "319": "tree branch", "320": "lamp", "321": "stop", "322": "overcast", "323": "jeans", "324": "new york", "325": "years", "326": "corona", "327": "chicken", "328": "wii controller", "329": "usa", "330": "8:35", "331": "arabic", "332": "oval", "333": "60", "334": "rack", "335": "backhand", "336": "suit", "337": "on track", "338": "good", "339": "ollie", "340": "frisbees", "341": "dog", "342": "wall", "343": "security", "344": "38", "345": "sweatband", "346": "21", "347": "lines", "348": "42", "349": "tv", "350": "shelter", "351": "fresh", "352": "cleaning", "353": "england", "354": "skate", "355": "italy", "356": "dresser", "357": "cheesecake", "358": "gaming", "359": "avocado", "360": "sofa", "361": "elephants", "362": "breakfast", "363": "purple", "364": "screen", "365": "motor", "366": "hotel room", "367": "american", "368": "corner", "369": "22", "370": "4", "371": "playing", "372": "california", "373": "person", "374": "eating", "375": "peeing", "376": "squash", "377": "riding motorcycle", "378": "bored", "379": "town", "380": "right", "381": "paper", "382": "lying down", "383": "sitting", "384": "19", "385": "not here", "386": "lifeguard", "387": "15", "388": "labrador", "389": "car", "390": "bikes", "391": "on dresser", "392": "street", "393": "germany", "394": "writing", "395": "catching", "396": "big", "397": "20", "398": "head", "399": "tent", "400": "girl", "401": "boxes", "402": "lanyard", "403": "cook", "404": "subway", "405": "little girl", "406": "18", "407": "ivy", "408": "triangles", "409": "stop sign", "410": "britain", "411": "canopy", "412": "metal", "413": "shrimp", "414": "crane", "415": "bed", "416": "spanish", "417": "2012", "418": "closed", "419": "regular", "420": "1:10", "421": "shorts", "422": "2 years", "423": "love", "424": "blonde", "425": "tan", "426": "bob", "427": "white and green", "428": "yellow and green", "429": "1", "430": "red", "431": "fashion", "432": "center", "433": "truck", "434": "can't tell", "435": "lady", "436": "watching", "437": "they aren't", "438": "shadows", "439": "on road", "440": "commercial", "441": "3", "442": "clothes", "443": "logo", "444": "pot", "445": "pasture", "446": "giraffe", "447": "rubber", "448": "us", "449": "300", "450": "toilet", "451": "leaves", "452": "train", "453": "playing wii", "454": "reading", "455": "controller", "456": "antique", "457": "33", "458": "cats", "459": "on counter", "460": "comfort", "461": "under", "462": "ham", "463": "jp morgan", "464": "turkey", "465": "fruits", "466": "husky", "467": "purse", "468": "hat", "469": "red and blue", "470": "talking", "471": "dishes", "472": "in car", "473": "over", "474": "taking off", "475": "suv", "476": "hotel", "477": "nothing", "478": "brown", "479": "colored", "480": "lettuce", "481": "spots", "482": "cooking", "483": "asparagus", "484": "23", "485": "construction", "486": "onions", "487": "skyscraper", "488": "owner", "489": "living", "490": "fried", "491": "ducati", "492": "strawberry", "493": "clear", "494": "behind", "495": "salmon", "496": "animal", "497": "high", "498": "scrambled", "499": "male", "500": "100", "501": "huge", "502": "pm", "503": "night time", "504": "straight", "505": "ascending", "506": "ostrich", "507": "los angeles", "508": "sunny", "509": "drinking water", "510": "cumulus", "511": "skiing", "512": "lights", "513": "close", "514": "fall", "515": "smiling", "516": "night", "517": "on tower", "518": "gas", "519": "tour", "520": "old", "521": "corn", "522": "daisies", "523": "whipped cream", "524": "ski", "525": "plates", "526": "bicycle", "527": "basil", "528": "computer", "529": "red and silver", "530": "multi", "531": "mountain", "532": "catcher", "533": "skier", "534": "unsure", "535": "giraffes", "536": "couch", "537": "camping", "538": "cardinals", "539": "to left", "540": "shower", "541": "behind fence", "542": "utensils", "543": "7", "544": "railing", "545": "rocks", "546": "real", "547": "several", "548": "ramp", "549": "parking lot", "550": "cactus", "551": "chinese", "552": "nowhere", "553": "food", "554": "doughnut", "555": "bag", "556": "tiles", "557": "55", "558": "knife", "559": "messy", "560": "apples", "561": "shadow", "562": "spotted", "563": "upside down", "564": "hitting ball", "565": "skateboarder", "566": "quilt", "567": "ears", "568": "beach", "569": "on grass", "570": "20 ft", "571": "holding", "572": "snowboarder", "573": "bike", "574": "bull", "575": "motorbike", "576": "heat", "577": "backpack", "578": "navy", "579": "gold", "580": "slow", "581": "green and white", "582": "partly cloudy", "583": "skate park", "584": "bench", "585": "fruit", "586": "america", "587": "on bus", "588": "trailer", "589": "painting", "590": "7:45", "591": "playing baseball", "592": "track", "593": "opaque", "594": "spinach", "595": "socks", "596": "baby", "597": "swinging", "598": "paint", "599": "fast", "600": "straw", "601": "chain", "602": "in background", "603": "berries", "604": "cement", "605": "lots", "606": "not likely", "607": "beef", "608": "ketchup", "609": "air", "610": "advertisement", "611": "hundreds", "612": "day", "613": "in", "614": "luggage", "615": "flowers", "616": "both", "617": "serious", "618": "2013", "619": "freight", "620": "stand", "621": "hill", "622": "diamonds", "623": "suitcase", "624": "counter", "625": "unclear", "626": "kites", "627": "jackets", "628": "11", "629": "marble", "630": "players", "631": "donut", "632": "neon", "633": "windowsill", "634": "race", "635": "teal", "636": "cannot tell", "637": "carnations", "638": "kitchen", "639": "grass", "640": "london", "641": "fish", "642": "i don't know", "643": "trees", "644": "crosswalk", "645": "blue and white", "646": "child", "647": "not long", "648": "17", "649": "salad", "650": "snowboarding", "651": "2 feet", "652": "gun", "653": "sun", "654": "cows", "655": "75", "656": "by window", "657": "skis", "658": "catch", "659": "in bowl", "660": "fell", "661": "people", "662": "9", "663": "flip", "664": "rock", "665": "black and silver", "666": "monitor", "667": "cat", "668": "playing game", "669": "talking on phone", "670": "sleep", "671": "elephant", "672": "plastic", "673": "animals", "674": "rv", "675": "happiness", "676": "nighttime", "677": "hiking", "678": "water", "679": "chips", "680": "bear", "681": "walking", "682": "2", "683": "bus", "684": "solid", "685": "tree", "686": "throw", "687": "newspaper", "688": "yellow and orange", "689": "crown", "690": "wine bottle", "691": "shirts", "692": "many", "693": "yellow and red", "694": "handle", "695": "orchid", "696": "accident", "697": "wedding", "698": "not very", "699": "3 feet", "700": "furniture", "701": "foot", "702": "brushing", "703": "48", "704": "50", "705": "bottom", "706": "daytime", "707": "conductor", "708": "shoes", "709": "wine tasting", "710": "left side", "711": "west", "712": "board", "713": "small", "714": "marker", "715": "army", "716": "sunlight", "717": "mushroom", "718": "horse", "719": "spring", "720": "brick", "721": "baseball", "722": "rough", "723": "wood", "724": "passengers", "725": "tiled", "726": "first base", "727": "children", "728": "chopsticks", "729": "washing", "730": "1 year", "731": "normal", "732": "protection", "733": "fire hydrant", "734": "grapes", "735": "morning", "736": "round", "737": "over easy", "738": "facebook", "739": "bush", "740": "teddy bear", "741": "toyota", "742": "maroon", "743": "white and orange", "744": "wires", "745": "inside", "746": "picture", "747": "light", "748": "play", "749": "name tag", "750": "straight ahead", "751": "37", "752": "railroad crossing", "753": "tail", "754": "ice cream", "755": "wiimote", "756": "graduation", "757": "boys", "758": "blurry", "759": "abstract", "760": "poor", "761": "diamond", "762": "deer", "763": "cars", "764": "gray and white", "765": "lot", "766": "human", "767": "in street", "768": "54", "769": "red and black", "770": "roll", "771": "cupcake", "772": "dark", "773": "parking", "774": "pan", "775": "meat", "776": "8", "777": "dodgers", "778": "ear", "779": "1:50", "780": "pink", "781": "cranes", "782": "carrots", "783": "black", "784": "pond", "785": "park", "786": "9:35", "787": "field", "788": "pastries", "789": "glass", "790": "television", "791": "100 feet", "792": "descending", "793": "brushing her teeth", "794": "wii remote", "795": "banana", "796": "bun", "797": "towel", "798": "north america", "799": "looking at camera", "800": "in water", "801": "garlic", "802": "up", "803": "sub", "804": "power lines", "805": "back", "806": "red and gray", "807": "grazing", "808": "2000", "809": "bathroom", "810": "mountains", "811": "in sky", "812": "noon", "813": "dc", "814": "oil", "815": "cushion", "816": "sunset", "817": "open", "818": "o", "819": "pillow", "820": "asian", "821": "house", "822": "nobody", "823": "yellow and white", "824": "laying down", "825": "cargo", "826": "building", "827": "frosted", "828": "40", "829": "sink", "830": "6", "831": "2:00", "832": "blt", "833": "black and red", "834": "frog", "835": "hardwood", "836": "tennis", "837": "lilies", "838": "on street", "839": "plant", "840": "28", "841": "dock", "842": "mac", "843": "lg", "844": "wild", "845": "celery", "846": "omelet", "847": "woman", "848": "harley", "849": "net", "850": "cutting board", "851": "adult", "852": "exit", "853": "very", "854": "porcelain", "855": "savannah", "856": "on skateboard", "857": "black and yellow", "858": "first", "859": "british", "860": "italian", "861": "no man", "862": "orange", "863": "tired", "864": "curtain", "865": "at table", "866": "0", "867": "peanuts", "868": "river", "869": "square", "870": "camera", "871": "evening", "872": "fun", "873": "lays", "874": "photographer", "875": "dress", "876": "medium", "877": "left", "878": "tile", "879": "bike rack", "880": "silver and red", "881": "billabong", "882": "cup", "883": "east", "884": "frisbee", "885": "english", "886": "pizza", "887": "sandwich", "888": "broken", "889": "helmet", "890": "gray and black", "891": "hugging", "892": "80", "893": "queen", "894": "suitcases", "895": "fork", "896": "butterfly", "897": "bicycles", "898": "art", "899": "black white", "900": "me", "901": "am", "902": "bat", "903": "forest", "904": "street name", "905": "29", "906": "church", "907": "city", "908": "forward", "909": "mushrooms", "910": "bird", "911": "ball", "912": "female", "913": "steel", "914": "concrete", "915": "right hand", "916": "70", "917": "throwing", "918": "multiple", "919": "dots", "920": "umpire", "921": "jet", "922": "on", "923": "1:55", "924": "pots", "925": "steak", "926": "buildings", "927": "surfer", "928": "on wall", "929": "ahead", "930": "circle", "931": "wii remotes", "932": "day time", "933": "31", "934": "volkswagen", "935": "rain", "936": "remote", "937": "200", "938": "dinner", "939": "tuna", "940": "raining", "941": "onion", "942": "storm", "943": "floral", "944": "cake", "945": "pie", "946": "island", "947": "red and yellow", "948": "pans", "949": "milk", "950": "mustard", "951": "toilet paper", "952": "apple", "953": "hay", "954": "short", "955": "yamaha", "956": "palm", "957": "p", "958": "display", "959": "shade", "960": "coat", "961": "chair", "962": "25", "963": "f", "964": "down", "965": "30 mph", "966": "pole", "967": "hot dog", "968": "baseball bat", "969": "numbers", "970": "airport", "971": "rose", "972": "wind", "973": "beige", "974": "pictures", "975": "style", "976": "on his head", "977": "flying kite", "978": "riding bike", "979": "soup", "980": "bowl", "981": "motorcycle", "982": "summer", "983": "seagull", "984": "floor", "985": "30", "986": "india", "987": "warm", "988": "on water", "989": "roof", "990": "multicolored", "991": "zoo", "992": "cross", "993": "south", "994": "formica", "995": "rail", "996": "snow", "997": "forehand", "998": "outdoors", "999": "analog", "1000": "microwave", "1001": "daisy", "1002": "stew", "1003": "intersection", "1004": "pelicans", "1005": "right side", "1006": "72", "1007": "coffee", "1008": "nike", "1009": "playing video game", "1010": "mexico", "1011": "engine", "1012": "airplanes", "1013": "black and brown", "1014": "brushing teeth", "1015": "tabby", "1016": "white and brown", "1017": "rope", "1018": "striped", "1019": "full", "1020": "no", "1021": "above" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 866, "1": 429, "1 year": 730, "10": 137, "100": 500, "100 feet": 791, "10:10": 313, "11": 628, "11:00": 11, "11:05": 175, "11:10": 274, "11:15": 144, "11:20": 278, "12": 158, "13": 135, "14": 196, "15": 387, "150": 279, "16": 33, "17": 648, "18": 406, "19": 384, "1:10": 420, "1:50": 779, "1:55": 923, "2": 682, "2 feet": 651, "2 years": 422, "20": 397, "20 ft": 570, "200": 937, "2000": 808, "2010": 247, "2012": 417, "2013": 618, "21": 346, "22": 369, "23": 484, "24": 291, "25": 962, "26": 132, "28": 840, "29": 905, "2:00": 831, "3": 441, "3 feet": 699, "30": 985, "30 mph": 965, "300": 449, "31": 933, "33": 457, "37": 751, "38": 344, "4": 370, "40": 828, "42": 348, "48": 703, "5": 212, "50": 704, "54": 768, "55": 557, "6": 830, "60": 333, "7": 543, "70": 916, "72": 1006, "75": 655, "7:35": 311, "7:45": 590, "8": 776, "80": 892, "8:35": 330, "9": 662, "9:35": 786, "above": 1021, "abstract": 759, "accident": 696, "adult": 851, "advertisement": 610, "africa": 203, "afternoon": 38, "ahead": 929, "air": 609, "airplane": 10, "airplanes": 1012, "airport": 970, "all": 199, "am": 901, "america": 586, "american": 367, "ana": 224, "analog": 999, "angry": 78, "animal": 496, "animals": 673, "antique": 456, "apple": 952, "apples": 560, "arabic": 331, "army": 715, "arrow": 250, "art": 898, "ascending": 505, "asia": 176, "asian": 820, "asparagus": 483, "asphalt": 180, "at table": 865, "australia": 6, "avocado": 359, "away": 50, "baby": 596, "back": 805, "background": 243, "backhand": 335, "backpack": 577, "bag": 555, "baked": 304, "ball": 911, "banana": 795, "bananas": 309, "barn": 71, "baseball": 721, "baseball bat": 968, "baseball cap": 34, "baseball glove": 124, "basil": 527, "bat": 902, "bathroom": 809, "batter": 210, "batting": 22, "beach": 568, "beagle": 29, "bear": 680, "bed": 415, "bedroom": 109, "beef": 607, "behind": 494, "behind fence": 541, "beige": 973, "bench": 584, "berries": 603, "bicycle": 526, "bicycles": 897, "big": 396, "big ben": 202, "bike": 573, "bike rack": 879, "bikes": 390, "billabong": 881, "bird": 910, "birds": 264, "birthday": 20, "black": 783, "black and brown": 1013, "black and red": 833, "black and silver": 665, "black and white": 154, "black and yellow": 857, "black white": 899, "blanket": 289, "blonde": 424, "blt": 832, "blue": 112, "blue and white": 645, "blue and yellow": 222, "blueberries": 205, "blurry": 758, "bmw": 110, "board": 712, "boat": 234, "bob": 426, "books": 59, "bookshelf": 23, "bored": 378, "boredom": 213, "both": 616, "bottom": 705, "bowl": 980, "boxes": 401, "boy": 45, "boys": 757, "branch": 55, "bread": 281, "breakfast": 362, "brick": 720, "bricks": 246, "britain": 410, "british": 859, "broccoli": 117, "broken": 888, "brown": 478, "brown and white": 95, "brushing": 702, "brushing her teeth": 793, "brushing teeth": 1014, "buffalo": 248, "building": 826, "buildings": 926, "bull": 574, "bun": 796, "bunk": 60, "burger": 218, "bus": 683, "bus stop": 211, "bush": 739, "butterfly": 896, "button up": 258, "by window": 656, "cactus": 550, "cage": 9, "cake": 944, "calico": 139, "california": 372, "camera": 870, "camping": 537, "can't see": 263, "can't tell": 434, "canada": 302, "cannot tell": 636, "canopy": 411, "cap": 105, "car": 389, "cardinals": 538, "cargo": 825, "carnations": 637, "carrots": 782, "cars": 763, "cart": 170, "casserole": 269, "cat": 667, "catch": 658, "catcher": 532, "catching": 395, "cats": 458, "ceiling": 90, "celery": 845, "cell phone": 239, "cement": 604, "center": 432, "ceramic": 169, "chain": 601, "chains": 93, "chair": 961, "checkered": 296, "cheese": 134, "cheesecake": 357, "chicago": 294, "chicken": 327, "child": 646, "children": 727, "chinese": 551, "chips": 679, "chopsticks": 728, "church": 906, "circle": 930, "city": 907, "clay": 236, "cleaning": 352, "clear": 493, "clock": 116, "clock tower": 187, "close": 513, "closed": 418, "clothes": 442, "cloudy": 24, "coat": 960, "coffee": 1007, "collar": 190, "color": 297, "colored": 479, "comfort": 460, "commercial": 440, "computer": 528, "concrete": 914, "conductor": 707, "construction": 485, "controller": 455, "cook": 403, "cooking": 482, "corn": 521, "corner": 368, "corona": 326, "cotton": 167, "couch": 536, "counter": 624, "cow": 114, "cows": 654, "crane": 414, "cranes": 781, "cream": 103, "cross": 992, "crossing": 84, "crosswalk": 644, "crown": 689, "cubs": 194, "cumulus": 510, "cup": 882, "cupcake": 771, "curtain": 864, "curtains": 229, "cushion": 815, "cutting board": 850, "daisies": 522, "daisy": 1001, "dancing": 128, "dark": 772, "day": 612, "day time": 932, "daytime": 706, "dc": 813, "decoration": 82, "deer": 762, "dell": 299, "descending": 792, "desert": 172, "design": 73, "dessert": 123, "diamond": 761, "diamonds": 622, "dinner": 938, "dirt": 181, "dirty": 127, "dishes": 471, "display": 958, "dock": 841, "dodgers": 777, "dog": 341, "dog bed": 41, "don't know": 245, "donut": 631, "donuts": 182, "door": 1, "dots": 919, "double": 225, "double decker": 99, "doughnut": 554, "down": 964, "dr pepper": 7, "dress": 875, "dresser": 356, "drinking": 178, "drinking water": 509, "drinks": 262, "driving": 220, "ducati": 491, "duck": 283, "ducks": 287, "dusk": 58, "ear": 778, "earring": 266, "ears": 567, "east": 883, "eating": 374, "electric": 143, "elephant": 671, "elephants": 361, "email": 147, "engine": 1011, "england": 353, "english": 885, "evening": 871, "exit": 852, "f": 963, "facebook": 738, "fair": 87, "fall": 514, "falling": 152, "fashion": 431, "fast": 599, "feathers": 255, "fell": 660, "female": 912, "fence": 233, "fern": 4, "few": 177, "field": 787, "fire hydrant": 733, "first": 858, "first base": 726, "fish": 641, "fishing": 288, "flag": 260, "flat": 13, "flip": 663, "floor": 984, "floral": 943, "flower": 249, "flowers": 615, "flying kite": 977, "food": 553, "foot": 701, "football": 97, "for balance": 174, "forehand": 997, "forest": 903, "fork": 895, "formica": 994, "forward": 908, "frame": 193, "freight": 619, "french": 130, "fresh": 351, "fried": 490, "frisbee": 884, "frisbees": 340, "frog": 834, "front": 31, "frosted": 827, "frosting": 145, "fruit": 585, "fruits": 465, "full": 1019, "fun": 872, "furniture": 700, "game": 12, "gaming": 358, "garlic": 801, "gas": 518, "german": 201, "germany": 393, "giraffe": 446, "giraffes": 535, "girl": 400, "glass": 789, "glasses": 241, "glove": 285, "go": 150, "goggles": 156, "gold": 579, "gone": 19, "good": 338, "graduation": 756, "graffiti": 270, "grapes": 734, "grass": 639, "gray": 179, "gray and black": 890, "gray and white": 764, "grazing": 807, "green": 312, "green and white": 581, "green and yellow": 171, "ground": 106, "gun": 652, "hair": 79, "ham": 462, "hamburger": 306, "hand": 65, "handle": 694, "happiness": 675, "happy": 253, "hard": 16, "hardwood": 835, "harley": 848, "hat": 468, "hawaii": 188, "hay": 953, "head": 398, "headband": 300, "heat": 576, "helmet": 889, "high": 497, "hiking": 677, "hill": 621, "hitting ball": 564, "holding": 571, "honda": 235, "horse": 718, "hot dog": 967, "hotel": 476, "hotel room": 366, "house": 821, "hp": 244, "huge": 501, "hugging": 891, "human": 766, "humans": 166, "hundreds": 611, "husky": 466, "hydrant": 316, "i don't know": 642, "ice": 26, "ice cream": 754, "in": 613, "in air": 101, "in background": 602, "in bowl": 659, "in car": 472, "in corner": 232, "in field": 86, "in sky": 811, "in street": 767, "in water": 800, "india": 986, "inside": 745, "intersection": 1003, "island": 946, "italian": 860, "italy": 355, "ivy": 407, "jacket": 301, "jackets": 627, "japan": 184, "jeans": 323, "jeep": 228, "jet": 921, "jockey": 251, "john": 303, "jp morgan": 463, "jump": 125, "jumping": 72, "ketchup": 608, "khaki": 161, "king": 92, "kitchen": 638, "kite": 102, "kites": 626, "knife": 558, "labrador": 388, "lady": 435, "lake": 307, "lamp": 320, "landing": 189, "lanyard": 402, "laptop": 77, "large": 159, "laying down": 824, "lays": 873, "leather": 153, "leaves": 451, "left": 877, "left side": 710, "lettuce": 480, "lg": 843, "lifeguard": 386, "light": 747, "lighting": 80, "lights": 512, "lilies": 837, "lines": 347, "linoleum": 208, "listening": 35, "little": 107, "little girl": 405, "living": 489, "living room": 63, "logo": 443, "london": 640, "long": 131, "looking": 89, "looking at camera": 799, "looking out window": 275, "los angeles": 507, "lot": 765, "lots": 605, "love": 423, "low": 314, "luggage": 614, "lunch": 295, "lying down": 382, "mac": 842, "male": 499, "man": 256, "many": 692, "marble": 629, "marker": 714, "market": 28, "maroon": 742, "me": 900, "meat": 775, "medium": 876, "messy": 559, "metal": 412, "mexican": 52, "mexico": 1010, "microsoft": 271, "microwave": 1000, "milk": 949, "mirror": 70, "model": 186, "monitor": 666, "morning": 735, "motor": 365, "motorbike": 575, "motorcycle": 981, "motorcycles": 113, "mountain": 531, "mountains": 810, "mouth": 111, "mud": 216, "multi": 530, "multicolored": 990, "multiple": 918, "mushroom": 717, "mushrooms": 909, "mustard": 950, "name tag": 749, "napkin": 272, "natural": 293, "navy": 578, "necklace": 47, "neither": 64, "neon": 632, "net": 849, "new york": 324, "newspaper": 687, "night": 516, "night time": 503, "nighttime": 676, "nike": 1008, "no": 1020, "no dog": 284, "no man": 861, "no parking": 198, "no sign": 267, "no train": 259, "nobody": 822, "noon": 812, "normal": 731, "north": 160, "north america": 798, "not here": 385, "not in service": 43, "not likely": 606, "not long": 647, "not possible": 142, "not sure": 88, "not there": 8, "not very": 698, "nothing": 477, "nowhere": 552, "numbers": 969, "o": 818, "ocean": 85, "off": 276, "oil": 814, "old": 520, "ollie": 339, "omelet": 846, "on": 922, "on bench": 75, "on building": 17, "on bus": 587, "on counter": 459, "on dresser": 391, "on floor": 54, "on grass": 569, "on his head": 976, "on left": 5, "on phone": 286, "on road": 439, "on skateboard": 856, "on stove": 51, "on street": 838, "on tower": 517, "on track": 337, "on wall": 928, "on water": 988, "onion": 941, "onions": 486, "opaque": 593, "open": 817, "orange": 862, "orange and white": 67, "orange and yellow": 252, "orchid": 695, "oriental": 195, "ostrich": 506, "out": 100, "outdoors": 998, "outside": 120, "oval": 332, "oven": 242, "over": 473, "over easy": 737, "overcast": 322, "owner": 488, "p": 957, "pacific": 308, "paint": 598, "painting": 589, "palm": 956, "pan": 774, "pans": 948, "paper": 381, "paris": 68, "park": 785, "parked": 254, "parking": 773, "parking lot": 549, "partly cloudy": 582, "passengers": 724, "pasta": 30, "pastries": 788, "pasture": 445, "peanuts": 867, "peeing": 375, "pelican": 81, "pelicans": 1004, "people": 661, "person": 373, "phone": 61, "photographer": 874, "picnic table": 74, "picture": 746, "pictures": 974, "pie": 945, "pillow": 819, "pink": 780, "pink and white": 164, "pizza": 886, "plaid": 207, "plain": 209, "plane": 83, "plant": 839, "plastic": 672, "plate": 138, "plates": 525, "platform": 39, "play": 748, "player": 151, "players": 630, "playing": 371, "playing baseball": 591, "playing game": 668, "playing video game": 1009, "playing video games": 98, "playing wii": 453, "pm": 502, "pointing": 96, "pole": 966, "polo": 44, "pond": 784, "poor": 760, "porcelain": 854, "pork": 48, "pot": 444, "pots": 924, "power lines": 804, "protection": 732, "purple": 363, "purse": 467, "queen": 893, "quilt": 566, "race": 634, "rack": 334, "rail": 995, "railing": 544, "railroad crossing": 752, "rain": 935, "raining": 940, "ramp": 548, "reading": 454, "real": 546, "rectangle": 305, "red": 430, "red and black": 769, "red and blue": 469, "red and gray": 806, "red and green": 94, "red and silver": 529, "red and white": 3, "red and yellow": 947, "regular": 419, "relaxing": 163, "relish": 191, "remote": 936, "residential": 49, "resting": 290, "riding": 146, "riding bike": 978, "riding motorcycle": 377, "right": 380, "right hand": 915, "right side": 1005, "river": 868, "road": 298, "rock": 664, "rocks": 545, "roll": 770, "roof": 989, "rope": 1017, "rose": 971, "roses": 168, "rough": 722, "round": 736, "rubber": 447, "rv": 674, "sad": 118, "safari": 133, "sail": 183, "salad": 649, "salmon": 495, "sandwich": 887, "savannah": 855, "scarf": 104, "scrambled": 498, "screen": 364, "seagull": 983, "security": 343, "sedan": 69, "serious": 617, "serve": 261, "several": 547, "shade": 959, "shadow": 561, "shadows": 438, "shelter": 350, "shirt": 91, "shirts": 691, "shoes": 708, "shopping": 204, "short": 954, "shorts": 421, "shower": 540, "shrimp": 413, "sidewalk": 140, "sign": 21, "silver": 237, "silver and black": 126, "silver and red": 880, "single": 231, "sink": 829, "sitting": 383, "skate": 354, "skate park": 583, "skateboard": 53, "skateboarder": 565, "skateboarding": 292, "ski": 524, "skier": 533, "skiing": 511, "skis": 657, "sky": 226, "skyscraper": 487, "sleep": 670, "sleeping": 0, "slow": 580, "small": 713, "smile": 157, "smiling": 515, "snake": 14, "snow": 996, "snowboard": 268, "snowboarder": 572, "snowboarding": 650, "snowy": 25, "soccer": 18, "soccer ball": 141, "socks": 595, "sofa": 360, "solid": 684, "soup": 979, "south": 993, "spanish": 416, "spectators": 230, "spinach": 594, "spoon": 165, "spots": 481, "spotted": 562, "spring": 719, "square": 869, "squares": 214, "squash": 376, "stand": 620, "standing": 121, "station": 206, "steak": 925, "steel": 913, "stew": 1002, "stop": 321, "stop sign": 409, "storm": 942, "straight": 504, "straight ahead": 750, "straw": 600, "strawberries": 148, "strawberry": 492, "street": 392, "street name": 904, "string": 223, "striped": 1018, "stripes": 217, "student": 219, "style": 975, "sub": 803, "subway": 404, "suit": 336, "suitcase": 623, "suitcases": 894, "summer": 982, "sun": 653, "sunlight": 716, "sunny": 508, "sunset": 816, "surfer": 927, "suv": 475, "sweatband": 345, "swinging": 597, "t shirt": 46, "tabby": 1015, "table": 317, "tail": 753, "taking off": 474, "talking": 470, "talking on phone": 669, "tan": 425, "teal": 635, "teddy bear": 740, "television": 790, "tennis": 836, "tennis ball": 108, "tent": 399, "they aren't": 437, "throw": 686, "throwing": 917, "tie": 185, "tile": 878, "tiled": 725, "tiles": 556, "tired": 863, "to left": 539, "to right": 265, "toilet": 450, "toilet paper": 951, "toothpick": 221, "toothpicks": 215, "tour": 519, "toward": 42, "towel": 797, "tower": 149, "town": 379, "toyota": 741, "track": 592, "tracks": 238, "trailer": 588, "train": 452, "train tracks": 76, "transportation": 122, "tree": 685, "tree branch": 319, "trees": 643, "triangles": 408, "trick": 310, "truck": 433, "tulips": 318, "tuna": 939, "turkey": 464, "tv": 349, "twin": 257, "umbrellas": 66, "umpire": 920, "unclear": 625, "under": 461, "united states": 173, "unknown": 136, "unsure": 534, "up": 802, "upside down": 563, "urban": 57, "us": 448, "usa": 329, "utensils": 542, "vegetables": 277, "very": 853, "video game": 129, "visor": 155, "volkswagen": 934, "waiting": 200, "walking": 681, "wall": 342, "wallet": 197, "warm": 987, "washing": 729, "watching": 436, "water": 678, "wedding": 697, "weeds": 240, "west": 711, "whipped cream": 523, "white": 32, "white and black": 315, "white and blue": 119, "white and brown": 1016, "white and green": 427, "white and orange": 743, "white and red": 27, "wii": 40, "wii controller": 328, "wii remote": 794, "wii remotes": 931, "wiimote": 755, "wild": 844, "wind": 972, "window": 280, "window sill": 273, "windows": 15, "windowsill": 633, "wine": 162, "wine bottle": 690, "wine tasting": 709, "winter": 192, "wires": 744, "woman": 847, "women": 227, "wood": 723, "woods": 36, "working": 282, "writing": 394, "yamaha": 955, "years": 325, "yellow": 2, "yellow and blue": 62, "yellow and green": 428, "yellow and orange": 688, "yellow and red": 693, "yellow and white": 823, "yes": 56, "young": 37, "zebra": 115, "zoo": 991 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.33.1", "type_vocab_size": 2, "vocab_size": 30522 }