Vilt_fine_tune_2000 / config.json
aqachun's picture
Training in progress, step 400
f603b5a
{
"_name_or_path": "dandelin/vilt-b32-mlm",
"architectures": [
"ViltForQuestionAnswering"
],
"attention_probs_dropout_prob": 0.0,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"id2label": {
"0": "sleeping",
"1": "door",
"2": "yellow",
"3": "red and white",
"4": "fern",
"5": "on left",
"6": "australia",
"7": "dr pepper",
"8": "not there",
"9": "cage",
"10": "airplane",
"11": "11:00",
"12": "game",
"13": "flat",
"14": "snake",
"15": "windows",
"16": "hard",
"17": "on building",
"18": "soccer",
"19": "gone",
"20": "birthday",
"21": "sign",
"22": "batting",
"23": "bookshelf",
"24": "cloudy",
"25": "snowy",
"26": "ice",
"27": "white and red",
"28": "market",
"29": "beagle",
"30": "pasta",
"31": "front",
"32": "white",
"33": "16",
"34": "baseball cap",
"35": "listening",
"36": "woods",
"37": "young",
"38": "afternoon",
"39": "platform",
"40": "wii",
"41": "dog bed",
"42": "toward",
"43": "not in service",
"44": "polo",
"45": "boy",
"46": "t shirt",
"47": "necklace",
"48": "pork",
"49": "residential",
"50": "away",
"51": "on stove",
"52": "mexican",
"53": "skateboard",
"54": "on floor",
"55": "branch",
"56": "yes",
"57": "urban",
"58": "dusk",
"59": "books",
"60": "bunk",
"61": "phone",
"62": "yellow and blue",
"63": "living room",
"64": "neither",
"65": "hand",
"66": "umbrellas",
"67": "orange and white",
"68": "paris",
"69": "sedan",
"70": "mirror",
"71": "barn",
"72": "jumping",
"73": "design",
"74": "picnic table",
"75": "on bench",
"76": "train tracks",
"77": "laptop",
"78": "angry",
"79": "hair",
"80": "lighting",
"81": "pelican",
"82": "decoration",
"83": "plane",
"84": "crossing",
"85": "ocean",
"86": "in field",
"87": "fair",
"88": "not sure",
"89": "looking",
"90": "ceiling",
"91": "shirt",
"92": "king",
"93": "chains",
"94": "red and green",
"95": "brown and white",
"96": "pointing",
"97": "football",
"98": "playing video games",
"99": "double decker",
"100": "out",
"101": "in air",
"102": "kite",
"103": "cream",
"104": "scarf",
"105": "cap",
"106": "ground",
"107": "little",
"108": "tennis ball",
"109": "bedroom",
"110": "bmw",
"111": "mouth",
"112": "blue",
"113": "motorcycles",
"114": "cow",
"115": "zebra",
"116": "clock",
"117": "broccoli",
"118": "sad",
"119": "white and blue",
"120": "outside",
"121": "standing",
"122": "transportation",
"123": "dessert",
"124": "baseball glove",
"125": "jump",
"126": "silver and black",
"127": "dirty",
"128": "dancing",
"129": "video game",
"130": "french",
"131": "long",
"132": "26",
"133": "safari",
"134": "cheese",
"135": "13",
"136": "unknown",
"137": "10",
"138": "plate",
"139": "calico",
"140": "sidewalk",
"141": "soccer ball",
"142": "not possible",
"143": "electric",
"144": "11:15",
"145": "frosting",
"146": "riding",
"147": "email",
"148": "strawberries",
"149": "tower",
"150": "go",
"151": "player",
"152": "falling",
"153": "leather",
"154": "black and white",
"155": "visor",
"156": "goggles",
"157": "smile",
"158": "12",
"159": "large",
"160": "north",
"161": "khaki",
"162": "wine",
"163": "relaxing",
"164": "pink and white",
"165": "spoon",
"166": "humans",
"167": "cotton",
"168": "roses",
"169": "ceramic",
"170": "cart",
"171": "green and yellow",
"172": "desert",
"173": "united states",
"174": "for balance",
"175": "11:05",
"176": "asia",
"177": "few",
"178": "drinking",
"179": "gray",
"180": "asphalt",
"181": "dirt",
"182": "donuts",
"183": "sail",
"184": "japan",
"185": "tie",
"186": "model",
"187": "clock tower",
"188": "hawaii",
"189": "landing",
"190": "collar",
"191": "relish",
"192": "winter",
"193": "frame",
"194": "cubs",
"195": "oriental",
"196": "14",
"197": "wallet",
"198": "no parking",
"199": "all",
"200": "waiting",
"201": "german",
"202": "big ben",
"203": "africa",
"204": "shopping",
"205": "blueberries",
"206": "station",
"207": "plaid",
"208": "linoleum",
"209": "plain",
"210": "batter",
"211": "bus stop",
"212": "5",
"213": "boredom",
"214": "squares",
"215": "toothpicks",
"216": "mud",
"217": "stripes",
"218": "burger",
"219": "student",
"220": "driving",
"221": "toothpick",
"222": "blue and yellow",
"223": "string",
"224": "ana",
"225": "double",
"226": "sky",
"227": "women",
"228": "jeep",
"229": "curtains",
"230": "spectators",
"231": "single",
"232": "in corner",
"233": "fence",
"234": "boat",
"235": "honda",
"236": "clay",
"237": "silver",
"238": "tracks",
"239": "cell phone",
"240": "weeds",
"241": "glasses",
"242": "oven",
"243": "background",
"244": "hp",
"245": "don't know",
"246": "bricks",
"247": "2010",
"248": "buffalo",
"249": "flower",
"250": "arrow",
"251": "jockey",
"252": "orange and yellow",
"253": "happy",
"254": "parked",
"255": "feathers",
"256": "man",
"257": "twin",
"258": "button up",
"259": "no train",
"260": "flag",
"261": "serve",
"262": "drinks",
"263": "can't see",
"264": "birds",
"265": "to right",
"266": "earring",
"267": "no sign",
"268": "snowboard",
"269": "casserole",
"270": "graffiti",
"271": "microsoft",
"272": "napkin",
"273": "window sill",
"274": "11:10",
"275": "looking out window",
"276": "off",
"277": "vegetables",
"278": "11:20",
"279": "150",
"280": "window",
"281": "bread",
"282": "working",
"283": "duck",
"284": "no dog",
"285": "glove",
"286": "on phone",
"287": "ducks",
"288": "fishing",
"289": "blanket",
"290": "resting",
"291": "24",
"292": "skateboarding",
"293": "natural",
"294": "chicago",
"295": "lunch",
"296": "checkered",
"297": "color",
"298": "road",
"299": "dell",
"300": "headband",
"301": "jacket",
"302": "canada",
"303": "john",
"304": "baked",
"305": "rectangle",
"306": "hamburger",
"307": "lake",
"308": "pacific",
"309": "bananas",
"310": "trick",
"311": "7:35",
"312": "green",
"313": "10:10",
"314": "low",
"315": "white and black",
"316": "hydrant",
"317": "table",
"318": "tulips",
"319": "tree branch",
"320": "lamp",
"321": "stop",
"322": "overcast",
"323": "jeans",
"324": "new york",
"325": "years",
"326": "corona",
"327": "chicken",
"328": "wii controller",
"329": "usa",
"330": "8:35",
"331": "arabic",
"332": "oval",
"333": "60",
"334": "rack",
"335": "backhand",
"336": "suit",
"337": "on track",
"338": "good",
"339": "ollie",
"340": "frisbees",
"341": "dog",
"342": "wall",
"343": "security",
"344": "38",
"345": "sweatband",
"346": "21",
"347": "lines",
"348": "42",
"349": "tv",
"350": "shelter",
"351": "fresh",
"352": "cleaning",
"353": "england",
"354": "skate",
"355": "italy",
"356": "dresser",
"357": "cheesecake",
"358": "gaming",
"359": "avocado",
"360": "sofa",
"361": "elephants",
"362": "breakfast",
"363": "purple",
"364": "screen",
"365": "motor",
"366": "hotel room",
"367": "american",
"368": "corner",
"369": "22",
"370": "4",
"371": "playing",
"372": "california",
"373": "person",
"374": "eating",
"375": "peeing",
"376": "squash",
"377": "riding motorcycle",
"378": "bored",
"379": "town",
"380": "right",
"381": "paper",
"382": "lying down",
"383": "sitting",
"384": "19",
"385": "not here",
"386": "lifeguard",
"387": "15",
"388": "labrador",
"389": "car",
"390": "bikes",
"391": "on dresser",
"392": "street",
"393": "germany",
"394": "writing",
"395": "catching",
"396": "big",
"397": "20",
"398": "head",
"399": "tent",
"400": "girl",
"401": "boxes",
"402": "lanyard",
"403": "cook",
"404": "subway",
"405": "little girl",
"406": "18",
"407": "ivy",
"408": "triangles",
"409": "stop sign",
"410": "britain",
"411": "canopy",
"412": "metal",
"413": "shrimp",
"414": "crane",
"415": "bed",
"416": "spanish",
"417": "2012",
"418": "closed",
"419": "regular",
"420": "1:10",
"421": "shorts",
"422": "2 years",
"423": "love",
"424": "blonde",
"425": "tan",
"426": "bob",
"427": "white and green",
"428": "yellow and green",
"429": "1",
"430": "red",
"431": "fashion",
"432": "center",
"433": "truck",
"434": "can't tell",
"435": "lady",
"436": "watching",
"437": "they aren't",
"438": "shadows",
"439": "on road",
"440": "commercial",
"441": "3",
"442": "clothes",
"443": "logo",
"444": "pot",
"445": "pasture",
"446": "giraffe",
"447": "rubber",
"448": "us",
"449": "300",
"450": "toilet",
"451": "leaves",
"452": "train",
"453": "playing wii",
"454": "reading",
"455": "controller",
"456": "antique",
"457": "33",
"458": "cats",
"459": "on counter",
"460": "comfort",
"461": "under",
"462": "ham",
"463": "jp morgan",
"464": "turkey",
"465": "fruits",
"466": "husky",
"467": "purse",
"468": "hat",
"469": "red and blue",
"470": "talking",
"471": "dishes",
"472": "in car",
"473": "over",
"474": "taking off",
"475": "suv",
"476": "hotel",
"477": "nothing",
"478": "brown",
"479": "colored",
"480": "lettuce",
"481": "spots",
"482": "cooking",
"483": "asparagus",
"484": "23",
"485": "construction",
"486": "onions",
"487": "skyscraper",
"488": "owner",
"489": "living",
"490": "fried",
"491": "ducati",
"492": "strawberry",
"493": "clear",
"494": "behind",
"495": "salmon",
"496": "animal",
"497": "high",
"498": "scrambled",
"499": "male",
"500": "100",
"501": "huge",
"502": "pm",
"503": "night time",
"504": "straight",
"505": "ascending",
"506": "ostrich",
"507": "los angeles",
"508": "sunny",
"509": "drinking water",
"510": "cumulus",
"511": "skiing",
"512": "lights",
"513": "close",
"514": "fall",
"515": "smiling",
"516": "night",
"517": "on tower",
"518": "gas",
"519": "tour",
"520": "old",
"521": "corn",
"522": "daisies",
"523": "whipped cream",
"524": "ski",
"525": "plates",
"526": "bicycle",
"527": "basil",
"528": "computer",
"529": "red and silver",
"530": "multi",
"531": "mountain",
"532": "catcher",
"533": "skier",
"534": "unsure",
"535": "giraffes",
"536": "couch",
"537": "camping",
"538": "cardinals",
"539": "to left",
"540": "shower",
"541": "behind fence",
"542": "utensils",
"543": "7",
"544": "railing",
"545": "rocks",
"546": "real",
"547": "several",
"548": "ramp",
"549": "parking lot",
"550": "cactus",
"551": "chinese",
"552": "nowhere",
"553": "food",
"554": "doughnut",
"555": "bag",
"556": "tiles",
"557": "55",
"558": "knife",
"559": "messy",
"560": "apples",
"561": "shadow",
"562": "spotted",
"563": "upside down",
"564": "hitting ball",
"565": "skateboarder",
"566": "quilt",
"567": "ears",
"568": "beach",
"569": "on grass",
"570": "20 ft",
"571": "holding",
"572": "snowboarder",
"573": "bike",
"574": "bull",
"575": "motorbike",
"576": "heat",
"577": "backpack",
"578": "navy",
"579": "gold",
"580": "slow",
"581": "green and white",
"582": "partly cloudy",
"583": "skate park",
"584": "bench",
"585": "fruit",
"586": "america",
"587": "on bus",
"588": "trailer",
"589": "painting",
"590": "7:45",
"591": "playing baseball",
"592": "track",
"593": "opaque",
"594": "spinach",
"595": "socks",
"596": "baby",
"597": "swinging",
"598": "paint",
"599": "fast",
"600": "straw",
"601": "chain",
"602": "in background",
"603": "berries",
"604": "cement",
"605": "lots",
"606": "not likely",
"607": "beef",
"608": "ketchup",
"609": "air",
"610": "advertisement",
"611": "hundreds",
"612": "day",
"613": "in",
"614": "luggage",
"615": "flowers",
"616": "both",
"617": "serious",
"618": "2013",
"619": "freight",
"620": "stand",
"621": "hill",
"622": "diamonds",
"623": "suitcase",
"624": "counter",
"625": "unclear",
"626": "kites",
"627": "jackets",
"628": "11",
"629": "marble",
"630": "players",
"631": "donut",
"632": "neon",
"633": "windowsill",
"634": "race",
"635": "teal",
"636": "cannot tell",
"637": "carnations",
"638": "kitchen",
"639": "grass",
"640": "london",
"641": "fish",
"642": "i don't know",
"643": "trees",
"644": "crosswalk",
"645": "blue and white",
"646": "child",
"647": "not long",
"648": "17",
"649": "salad",
"650": "snowboarding",
"651": "2 feet",
"652": "gun",
"653": "sun",
"654": "cows",
"655": "75",
"656": "by window",
"657": "skis",
"658": "catch",
"659": "in bowl",
"660": "fell",
"661": "people",
"662": "9",
"663": "flip",
"664": "rock",
"665": "black and silver",
"666": "monitor",
"667": "cat",
"668": "playing game",
"669": "talking on phone",
"670": "sleep",
"671": "elephant",
"672": "plastic",
"673": "animals",
"674": "rv",
"675": "happiness",
"676": "nighttime",
"677": "hiking",
"678": "water",
"679": "chips",
"680": "bear",
"681": "walking",
"682": "2",
"683": "bus",
"684": "solid",
"685": "tree",
"686": "throw",
"687": "newspaper",
"688": "yellow and orange",
"689": "crown",
"690": "wine bottle",
"691": "shirts",
"692": "many",
"693": "yellow and red",
"694": "handle",
"695": "orchid",
"696": "accident",
"697": "wedding",
"698": "not very",
"699": "3 feet",
"700": "furniture",
"701": "foot",
"702": "brushing",
"703": "48",
"704": "50",
"705": "bottom",
"706": "daytime",
"707": "conductor",
"708": "shoes",
"709": "wine tasting",
"710": "left side",
"711": "west",
"712": "board",
"713": "small",
"714": "marker",
"715": "army",
"716": "sunlight",
"717": "mushroom",
"718": "horse",
"719": "spring",
"720": "brick",
"721": "baseball",
"722": "rough",
"723": "wood",
"724": "passengers",
"725": "tiled",
"726": "first base",
"727": "children",
"728": "chopsticks",
"729": "washing",
"730": "1 year",
"731": "normal",
"732": "protection",
"733": "fire hydrant",
"734": "grapes",
"735": "morning",
"736": "round",
"737": "over easy",
"738": "facebook",
"739": "bush",
"740": "teddy bear",
"741": "toyota",
"742": "maroon",
"743": "white and orange",
"744": "wires",
"745": "inside",
"746": "picture",
"747": "light",
"748": "play",
"749": "name tag",
"750": "straight ahead",
"751": "37",
"752": "railroad crossing",
"753": "tail",
"754": "ice cream",
"755": "wiimote",
"756": "graduation",
"757": "boys",
"758": "blurry",
"759": "abstract",
"760": "poor",
"761": "diamond",
"762": "deer",
"763": "cars",
"764": "gray and white",
"765": "lot",
"766": "human",
"767": "in street",
"768": "54",
"769": "red and black",
"770": "roll",
"771": "cupcake",
"772": "dark",
"773": "parking",
"774": "pan",
"775": "meat",
"776": "8",
"777": "dodgers",
"778": "ear",
"779": "1:50",
"780": "pink",
"781": "cranes",
"782": "carrots",
"783": "black",
"784": "pond",
"785": "park",
"786": "9:35",
"787": "field",
"788": "pastries",
"789": "glass",
"790": "television",
"791": "100 feet",
"792": "descending",
"793": "brushing her teeth",
"794": "wii remote",
"795": "banana",
"796": "bun",
"797": "towel",
"798": "north america",
"799": "looking at camera",
"800": "in water",
"801": "garlic",
"802": "up",
"803": "sub",
"804": "power lines",
"805": "back",
"806": "red and gray",
"807": "grazing",
"808": "2000",
"809": "bathroom",
"810": "mountains",
"811": "in sky",
"812": "noon",
"813": "dc",
"814": "oil",
"815": "cushion",
"816": "sunset",
"817": "open",
"818": "o",
"819": "pillow",
"820": "asian",
"821": "house",
"822": "nobody",
"823": "yellow and white",
"824": "laying down",
"825": "cargo",
"826": "building",
"827": "frosted",
"828": "40",
"829": "sink",
"830": "6",
"831": "2:00",
"832": "blt",
"833": "black and red",
"834": "frog",
"835": "hardwood",
"836": "tennis",
"837": "lilies",
"838": "on street",
"839": "plant",
"840": "28",
"841": "dock",
"842": "mac",
"843": "lg",
"844": "wild",
"845": "celery",
"846": "omelet",
"847": "woman",
"848": "harley",
"849": "net",
"850": "cutting board",
"851": "adult",
"852": "exit",
"853": "very",
"854": "porcelain",
"855": "savannah",
"856": "on skateboard",
"857": "black and yellow",
"858": "first",
"859": "british",
"860": "italian",
"861": "no man",
"862": "orange",
"863": "tired",
"864": "curtain",
"865": "at table",
"866": "0",
"867": "peanuts",
"868": "river",
"869": "square",
"870": "camera",
"871": "evening",
"872": "fun",
"873": "lays",
"874": "photographer",
"875": "dress",
"876": "medium",
"877": "left",
"878": "tile",
"879": "bike rack",
"880": "silver and red",
"881": "billabong",
"882": "cup",
"883": "east",
"884": "frisbee",
"885": "english",
"886": "pizza",
"887": "sandwich",
"888": "broken",
"889": "helmet",
"890": "gray and black",
"891": "hugging",
"892": "80",
"893": "queen",
"894": "suitcases",
"895": "fork",
"896": "butterfly",
"897": "bicycles",
"898": "art",
"899": "black white",
"900": "me",
"901": "am",
"902": "bat",
"903": "forest",
"904": "street name",
"905": "29",
"906": "church",
"907": "city",
"908": "forward",
"909": "mushrooms",
"910": "bird",
"911": "ball",
"912": "female",
"913": "steel",
"914": "concrete",
"915": "right hand",
"916": "70",
"917": "throwing",
"918": "multiple",
"919": "dots",
"920": "umpire",
"921": "jet",
"922": "on",
"923": "1:55",
"924": "pots",
"925": "steak",
"926": "buildings",
"927": "surfer",
"928": "on wall",
"929": "ahead",
"930": "circle",
"931": "wii remotes",
"932": "day time",
"933": "31",
"934": "volkswagen",
"935": "rain",
"936": "remote",
"937": "200",
"938": "dinner",
"939": "tuna",
"940": "raining",
"941": "onion",
"942": "storm",
"943": "floral",
"944": "cake",
"945": "pie",
"946": "island",
"947": "red and yellow",
"948": "pans",
"949": "milk",
"950": "mustard",
"951": "toilet paper",
"952": "apple",
"953": "hay",
"954": "short",
"955": "yamaha",
"956": "palm",
"957": "p",
"958": "display",
"959": "shade",
"960": "coat",
"961": "chair",
"962": "25",
"963": "f",
"964": "down",
"965": "30 mph",
"966": "pole",
"967": "hot dog",
"968": "baseball bat",
"969": "numbers",
"970": "airport",
"971": "rose",
"972": "wind",
"973": "beige",
"974": "pictures",
"975": "style",
"976": "on his head",
"977": "flying kite",
"978": "riding bike",
"979": "soup",
"980": "bowl",
"981": "motorcycle",
"982": "summer",
"983": "seagull",
"984": "floor",
"985": "30",
"986": "india",
"987": "warm",
"988": "on water",
"989": "roof",
"990": "multicolored",
"991": "zoo",
"992": "cross",
"993": "south",
"994": "formica",
"995": "rail",
"996": "snow",
"997": "forehand",
"998": "outdoors",
"999": "analog",
"1000": "microwave",
"1001": "daisy",
"1002": "stew",
"1003": "intersection",
"1004": "pelicans",
"1005": "right side",
"1006": "72",
"1007": "coffee",
"1008": "nike",
"1009": "playing video game",
"1010": "mexico",
"1011": "engine",
"1012": "airplanes",
"1013": "black and brown",
"1014": "brushing teeth",
"1015": "tabby",
"1016": "white and brown",
"1017": "rope",
"1018": "striped",
"1019": "full",
"1020": "no",
"1021": "above"
},
"image_size": 384,
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"0": 866,
"1": 429,
"1 year": 730,
"10": 137,
"100": 500,
"100 feet": 791,
"10:10": 313,
"11": 628,
"11:00": 11,
"11:05": 175,
"11:10": 274,
"11:15": 144,
"11:20": 278,
"12": 158,
"13": 135,
"14": 196,
"15": 387,
"150": 279,
"16": 33,
"17": 648,
"18": 406,
"19": 384,
"1:10": 420,
"1:50": 779,
"1:55": 923,
"2": 682,
"2 feet": 651,
"2 years": 422,
"20": 397,
"20 ft": 570,
"200": 937,
"2000": 808,
"2010": 247,
"2012": 417,
"2013": 618,
"21": 346,
"22": 369,
"23": 484,
"24": 291,
"25": 962,
"26": 132,
"28": 840,
"29": 905,
"2:00": 831,
"3": 441,
"3 feet": 699,
"30": 985,
"30 mph": 965,
"300": 449,
"31": 933,
"33": 457,
"37": 751,
"38": 344,
"4": 370,
"40": 828,
"42": 348,
"48": 703,
"5": 212,
"50": 704,
"54": 768,
"55": 557,
"6": 830,
"60": 333,
"7": 543,
"70": 916,
"72": 1006,
"75": 655,
"7:35": 311,
"7:45": 590,
"8": 776,
"80": 892,
"8:35": 330,
"9": 662,
"9:35": 786,
"above": 1021,
"abstract": 759,
"accident": 696,
"adult": 851,
"advertisement": 610,
"africa": 203,
"afternoon": 38,
"ahead": 929,
"air": 609,
"airplane": 10,
"airplanes": 1012,
"airport": 970,
"all": 199,
"am": 901,
"america": 586,
"american": 367,
"ana": 224,
"analog": 999,
"angry": 78,
"animal": 496,
"animals": 673,
"antique": 456,
"apple": 952,
"apples": 560,
"arabic": 331,
"army": 715,
"arrow": 250,
"art": 898,
"ascending": 505,
"asia": 176,
"asian": 820,
"asparagus": 483,
"asphalt": 180,
"at table": 865,
"australia": 6,
"avocado": 359,
"away": 50,
"baby": 596,
"back": 805,
"background": 243,
"backhand": 335,
"backpack": 577,
"bag": 555,
"baked": 304,
"ball": 911,
"banana": 795,
"bananas": 309,
"barn": 71,
"baseball": 721,
"baseball bat": 968,
"baseball cap": 34,
"baseball glove": 124,
"basil": 527,
"bat": 902,
"bathroom": 809,
"batter": 210,
"batting": 22,
"beach": 568,
"beagle": 29,
"bear": 680,
"bed": 415,
"bedroom": 109,
"beef": 607,
"behind": 494,
"behind fence": 541,
"beige": 973,
"bench": 584,
"berries": 603,
"bicycle": 526,
"bicycles": 897,
"big": 396,
"big ben": 202,
"bike": 573,
"bike rack": 879,
"bikes": 390,
"billabong": 881,
"bird": 910,
"birds": 264,
"birthday": 20,
"black": 783,
"black and brown": 1013,
"black and red": 833,
"black and silver": 665,
"black and white": 154,
"black and yellow": 857,
"black white": 899,
"blanket": 289,
"blonde": 424,
"blt": 832,
"blue": 112,
"blue and white": 645,
"blue and yellow": 222,
"blueberries": 205,
"blurry": 758,
"bmw": 110,
"board": 712,
"boat": 234,
"bob": 426,
"books": 59,
"bookshelf": 23,
"bored": 378,
"boredom": 213,
"both": 616,
"bottom": 705,
"bowl": 980,
"boxes": 401,
"boy": 45,
"boys": 757,
"branch": 55,
"bread": 281,
"breakfast": 362,
"brick": 720,
"bricks": 246,
"britain": 410,
"british": 859,
"broccoli": 117,
"broken": 888,
"brown": 478,
"brown and white": 95,
"brushing": 702,
"brushing her teeth": 793,
"brushing teeth": 1014,
"buffalo": 248,
"building": 826,
"buildings": 926,
"bull": 574,
"bun": 796,
"bunk": 60,
"burger": 218,
"bus": 683,
"bus stop": 211,
"bush": 739,
"butterfly": 896,
"button up": 258,
"by window": 656,
"cactus": 550,
"cage": 9,
"cake": 944,
"calico": 139,
"california": 372,
"camera": 870,
"camping": 537,
"can't see": 263,
"can't tell": 434,
"canada": 302,
"cannot tell": 636,
"canopy": 411,
"cap": 105,
"car": 389,
"cardinals": 538,
"cargo": 825,
"carnations": 637,
"carrots": 782,
"cars": 763,
"cart": 170,
"casserole": 269,
"cat": 667,
"catch": 658,
"catcher": 532,
"catching": 395,
"cats": 458,
"ceiling": 90,
"celery": 845,
"cell phone": 239,
"cement": 604,
"center": 432,
"ceramic": 169,
"chain": 601,
"chains": 93,
"chair": 961,
"checkered": 296,
"cheese": 134,
"cheesecake": 357,
"chicago": 294,
"chicken": 327,
"child": 646,
"children": 727,
"chinese": 551,
"chips": 679,
"chopsticks": 728,
"church": 906,
"circle": 930,
"city": 907,
"clay": 236,
"cleaning": 352,
"clear": 493,
"clock": 116,
"clock tower": 187,
"close": 513,
"closed": 418,
"clothes": 442,
"cloudy": 24,
"coat": 960,
"coffee": 1007,
"collar": 190,
"color": 297,
"colored": 479,
"comfort": 460,
"commercial": 440,
"computer": 528,
"concrete": 914,
"conductor": 707,
"construction": 485,
"controller": 455,
"cook": 403,
"cooking": 482,
"corn": 521,
"corner": 368,
"corona": 326,
"cotton": 167,
"couch": 536,
"counter": 624,
"cow": 114,
"cows": 654,
"crane": 414,
"cranes": 781,
"cream": 103,
"cross": 992,
"crossing": 84,
"crosswalk": 644,
"crown": 689,
"cubs": 194,
"cumulus": 510,
"cup": 882,
"cupcake": 771,
"curtain": 864,
"curtains": 229,
"cushion": 815,
"cutting board": 850,
"daisies": 522,
"daisy": 1001,
"dancing": 128,
"dark": 772,
"day": 612,
"day time": 932,
"daytime": 706,
"dc": 813,
"decoration": 82,
"deer": 762,
"dell": 299,
"descending": 792,
"desert": 172,
"design": 73,
"dessert": 123,
"diamond": 761,
"diamonds": 622,
"dinner": 938,
"dirt": 181,
"dirty": 127,
"dishes": 471,
"display": 958,
"dock": 841,
"dodgers": 777,
"dog": 341,
"dog bed": 41,
"don't know": 245,
"donut": 631,
"donuts": 182,
"door": 1,
"dots": 919,
"double": 225,
"double decker": 99,
"doughnut": 554,
"down": 964,
"dr pepper": 7,
"dress": 875,
"dresser": 356,
"drinking": 178,
"drinking water": 509,
"drinks": 262,
"driving": 220,
"ducati": 491,
"duck": 283,
"ducks": 287,
"dusk": 58,
"ear": 778,
"earring": 266,
"ears": 567,
"east": 883,
"eating": 374,
"electric": 143,
"elephant": 671,
"elephants": 361,
"email": 147,
"engine": 1011,
"england": 353,
"english": 885,
"evening": 871,
"exit": 852,
"f": 963,
"facebook": 738,
"fair": 87,
"fall": 514,
"falling": 152,
"fashion": 431,
"fast": 599,
"feathers": 255,
"fell": 660,
"female": 912,
"fence": 233,
"fern": 4,
"few": 177,
"field": 787,
"fire hydrant": 733,
"first": 858,
"first base": 726,
"fish": 641,
"fishing": 288,
"flag": 260,
"flat": 13,
"flip": 663,
"floor": 984,
"floral": 943,
"flower": 249,
"flowers": 615,
"flying kite": 977,
"food": 553,
"foot": 701,
"football": 97,
"for balance": 174,
"forehand": 997,
"forest": 903,
"fork": 895,
"formica": 994,
"forward": 908,
"frame": 193,
"freight": 619,
"french": 130,
"fresh": 351,
"fried": 490,
"frisbee": 884,
"frisbees": 340,
"frog": 834,
"front": 31,
"frosted": 827,
"frosting": 145,
"fruit": 585,
"fruits": 465,
"full": 1019,
"fun": 872,
"furniture": 700,
"game": 12,
"gaming": 358,
"garlic": 801,
"gas": 518,
"german": 201,
"germany": 393,
"giraffe": 446,
"giraffes": 535,
"girl": 400,
"glass": 789,
"glasses": 241,
"glove": 285,
"go": 150,
"goggles": 156,
"gold": 579,
"gone": 19,
"good": 338,
"graduation": 756,
"graffiti": 270,
"grapes": 734,
"grass": 639,
"gray": 179,
"gray and black": 890,
"gray and white": 764,
"grazing": 807,
"green": 312,
"green and white": 581,
"green and yellow": 171,
"ground": 106,
"gun": 652,
"hair": 79,
"ham": 462,
"hamburger": 306,
"hand": 65,
"handle": 694,
"happiness": 675,
"happy": 253,
"hard": 16,
"hardwood": 835,
"harley": 848,
"hat": 468,
"hawaii": 188,
"hay": 953,
"head": 398,
"headband": 300,
"heat": 576,
"helmet": 889,
"high": 497,
"hiking": 677,
"hill": 621,
"hitting ball": 564,
"holding": 571,
"honda": 235,
"horse": 718,
"hot dog": 967,
"hotel": 476,
"hotel room": 366,
"house": 821,
"hp": 244,
"huge": 501,
"hugging": 891,
"human": 766,
"humans": 166,
"hundreds": 611,
"husky": 466,
"hydrant": 316,
"i don't know": 642,
"ice": 26,
"ice cream": 754,
"in": 613,
"in air": 101,
"in background": 602,
"in bowl": 659,
"in car": 472,
"in corner": 232,
"in field": 86,
"in sky": 811,
"in street": 767,
"in water": 800,
"india": 986,
"inside": 745,
"intersection": 1003,
"island": 946,
"italian": 860,
"italy": 355,
"ivy": 407,
"jacket": 301,
"jackets": 627,
"japan": 184,
"jeans": 323,
"jeep": 228,
"jet": 921,
"jockey": 251,
"john": 303,
"jp morgan": 463,
"jump": 125,
"jumping": 72,
"ketchup": 608,
"khaki": 161,
"king": 92,
"kitchen": 638,
"kite": 102,
"kites": 626,
"knife": 558,
"labrador": 388,
"lady": 435,
"lake": 307,
"lamp": 320,
"landing": 189,
"lanyard": 402,
"laptop": 77,
"large": 159,
"laying down": 824,
"lays": 873,
"leather": 153,
"leaves": 451,
"left": 877,
"left side": 710,
"lettuce": 480,
"lg": 843,
"lifeguard": 386,
"light": 747,
"lighting": 80,
"lights": 512,
"lilies": 837,
"lines": 347,
"linoleum": 208,
"listening": 35,
"little": 107,
"little girl": 405,
"living": 489,
"living room": 63,
"logo": 443,
"london": 640,
"long": 131,
"looking": 89,
"looking at camera": 799,
"looking out window": 275,
"los angeles": 507,
"lot": 765,
"lots": 605,
"love": 423,
"low": 314,
"luggage": 614,
"lunch": 295,
"lying down": 382,
"mac": 842,
"male": 499,
"man": 256,
"many": 692,
"marble": 629,
"marker": 714,
"market": 28,
"maroon": 742,
"me": 900,
"meat": 775,
"medium": 876,
"messy": 559,
"metal": 412,
"mexican": 52,
"mexico": 1010,
"microsoft": 271,
"microwave": 1000,
"milk": 949,
"mirror": 70,
"model": 186,
"monitor": 666,
"morning": 735,
"motor": 365,
"motorbike": 575,
"motorcycle": 981,
"motorcycles": 113,
"mountain": 531,
"mountains": 810,
"mouth": 111,
"mud": 216,
"multi": 530,
"multicolored": 990,
"multiple": 918,
"mushroom": 717,
"mushrooms": 909,
"mustard": 950,
"name tag": 749,
"napkin": 272,
"natural": 293,
"navy": 578,
"necklace": 47,
"neither": 64,
"neon": 632,
"net": 849,
"new york": 324,
"newspaper": 687,
"night": 516,
"night time": 503,
"nighttime": 676,
"nike": 1008,
"no": 1020,
"no dog": 284,
"no man": 861,
"no parking": 198,
"no sign": 267,
"no train": 259,
"nobody": 822,
"noon": 812,
"normal": 731,
"north": 160,
"north america": 798,
"not here": 385,
"not in service": 43,
"not likely": 606,
"not long": 647,
"not possible": 142,
"not sure": 88,
"not there": 8,
"not very": 698,
"nothing": 477,
"nowhere": 552,
"numbers": 969,
"o": 818,
"ocean": 85,
"off": 276,
"oil": 814,
"old": 520,
"ollie": 339,
"omelet": 846,
"on": 922,
"on bench": 75,
"on building": 17,
"on bus": 587,
"on counter": 459,
"on dresser": 391,
"on floor": 54,
"on grass": 569,
"on his head": 976,
"on left": 5,
"on phone": 286,
"on road": 439,
"on skateboard": 856,
"on stove": 51,
"on street": 838,
"on tower": 517,
"on track": 337,
"on wall": 928,
"on water": 988,
"onion": 941,
"onions": 486,
"opaque": 593,
"open": 817,
"orange": 862,
"orange and white": 67,
"orange and yellow": 252,
"orchid": 695,
"oriental": 195,
"ostrich": 506,
"out": 100,
"outdoors": 998,
"outside": 120,
"oval": 332,
"oven": 242,
"over": 473,
"over easy": 737,
"overcast": 322,
"owner": 488,
"p": 957,
"pacific": 308,
"paint": 598,
"painting": 589,
"palm": 956,
"pan": 774,
"pans": 948,
"paper": 381,
"paris": 68,
"park": 785,
"parked": 254,
"parking": 773,
"parking lot": 549,
"partly cloudy": 582,
"passengers": 724,
"pasta": 30,
"pastries": 788,
"pasture": 445,
"peanuts": 867,
"peeing": 375,
"pelican": 81,
"pelicans": 1004,
"people": 661,
"person": 373,
"phone": 61,
"photographer": 874,
"picnic table": 74,
"picture": 746,
"pictures": 974,
"pie": 945,
"pillow": 819,
"pink": 780,
"pink and white": 164,
"pizza": 886,
"plaid": 207,
"plain": 209,
"plane": 83,
"plant": 839,
"plastic": 672,
"plate": 138,
"plates": 525,
"platform": 39,
"play": 748,
"player": 151,
"players": 630,
"playing": 371,
"playing baseball": 591,
"playing game": 668,
"playing video game": 1009,
"playing video games": 98,
"playing wii": 453,
"pm": 502,
"pointing": 96,
"pole": 966,
"polo": 44,
"pond": 784,
"poor": 760,
"porcelain": 854,
"pork": 48,
"pot": 444,
"pots": 924,
"power lines": 804,
"protection": 732,
"purple": 363,
"purse": 467,
"queen": 893,
"quilt": 566,
"race": 634,
"rack": 334,
"rail": 995,
"railing": 544,
"railroad crossing": 752,
"rain": 935,
"raining": 940,
"ramp": 548,
"reading": 454,
"real": 546,
"rectangle": 305,
"red": 430,
"red and black": 769,
"red and blue": 469,
"red and gray": 806,
"red and green": 94,
"red and silver": 529,
"red and white": 3,
"red and yellow": 947,
"regular": 419,
"relaxing": 163,
"relish": 191,
"remote": 936,
"residential": 49,
"resting": 290,
"riding": 146,
"riding bike": 978,
"riding motorcycle": 377,
"right": 380,
"right hand": 915,
"right side": 1005,
"river": 868,
"road": 298,
"rock": 664,
"rocks": 545,
"roll": 770,
"roof": 989,
"rope": 1017,
"rose": 971,
"roses": 168,
"rough": 722,
"round": 736,
"rubber": 447,
"rv": 674,
"sad": 118,
"safari": 133,
"sail": 183,
"salad": 649,
"salmon": 495,
"sandwich": 887,
"savannah": 855,
"scarf": 104,
"scrambled": 498,
"screen": 364,
"seagull": 983,
"security": 343,
"sedan": 69,
"serious": 617,
"serve": 261,
"several": 547,
"shade": 959,
"shadow": 561,
"shadows": 438,
"shelter": 350,
"shirt": 91,
"shirts": 691,
"shoes": 708,
"shopping": 204,
"short": 954,
"shorts": 421,
"shower": 540,
"shrimp": 413,
"sidewalk": 140,
"sign": 21,
"silver": 237,
"silver and black": 126,
"silver and red": 880,
"single": 231,
"sink": 829,
"sitting": 383,
"skate": 354,
"skate park": 583,
"skateboard": 53,
"skateboarder": 565,
"skateboarding": 292,
"ski": 524,
"skier": 533,
"skiing": 511,
"skis": 657,
"sky": 226,
"skyscraper": 487,
"sleep": 670,
"sleeping": 0,
"slow": 580,
"small": 713,
"smile": 157,
"smiling": 515,
"snake": 14,
"snow": 996,
"snowboard": 268,
"snowboarder": 572,
"snowboarding": 650,
"snowy": 25,
"soccer": 18,
"soccer ball": 141,
"socks": 595,
"sofa": 360,
"solid": 684,
"soup": 979,
"south": 993,
"spanish": 416,
"spectators": 230,
"spinach": 594,
"spoon": 165,
"spots": 481,
"spotted": 562,
"spring": 719,
"square": 869,
"squares": 214,
"squash": 376,
"stand": 620,
"standing": 121,
"station": 206,
"steak": 925,
"steel": 913,
"stew": 1002,
"stop": 321,
"stop sign": 409,
"storm": 942,
"straight": 504,
"straight ahead": 750,
"straw": 600,
"strawberries": 148,
"strawberry": 492,
"street": 392,
"street name": 904,
"string": 223,
"striped": 1018,
"stripes": 217,
"student": 219,
"style": 975,
"sub": 803,
"subway": 404,
"suit": 336,
"suitcase": 623,
"suitcases": 894,
"summer": 982,
"sun": 653,
"sunlight": 716,
"sunny": 508,
"sunset": 816,
"surfer": 927,
"suv": 475,
"sweatband": 345,
"swinging": 597,
"t shirt": 46,
"tabby": 1015,
"table": 317,
"tail": 753,
"taking off": 474,
"talking": 470,
"talking on phone": 669,
"tan": 425,
"teal": 635,
"teddy bear": 740,
"television": 790,
"tennis": 836,
"tennis ball": 108,
"tent": 399,
"they aren't": 437,
"throw": 686,
"throwing": 917,
"tie": 185,
"tile": 878,
"tiled": 725,
"tiles": 556,
"tired": 863,
"to left": 539,
"to right": 265,
"toilet": 450,
"toilet paper": 951,
"toothpick": 221,
"toothpicks": 215,
"tour": 519,
"toward": 42,
"towel": 797,
"tower": 149,
"town": 379,
"toyota": 741,
"track": 592,
"tracks": 238,
"trailer": 588,
"train": 452,
"train tracks": 76,
"transportation": 122,
"tree": 685,
"tree branch": 319,
"trees": 643,
"triangles": 408,
"trick": 310,
"truck": 433,
"tulips": 318,
"tuna": 939,
"turkey": 464,
"tv": 349,
"twin": 257,
"umbrellas": 66,
"umpire": 920,
"unclear": 625,
"under": 461,
"united states": 173,
"unknown": 136,
"unsure": 534,
"up": 802,
"upside down": 563,
"urban": 57,
"us": 448,
"usa": 329,
"utensils": 542,
"vegetables": 277,
"very": 853,
"video game": 129,
"visor": 155,
"volkswagen": 934,
"waiting": 200,
"walking": 681,
"wall": 342,
"wallet": 197,
"warm": 987,
"washing": 729,
"watching": 436,
"water": 678,
"wedding": 697,
"weeds": 240,
"west": 711,
"whipped cream": 523,
"white": 32,
"white and black": 315,
"white and blue": 119,
"white and brown": 1016,
"white and green": 427,
"white and orange": 743,
"white and red": 27,
"wii": 40,
"wii controller": 328,
"wii remote": 794,
"wii remotes": 931,
"wiimote": 755,
"wild": 844,
"wind": 972,
"window": 280,
"window sill": 273,
"windows": 15,
"windowsill": 633,
"wine": 162,
"wine bottle": 690,
"wine tasting": 709,
"winter": 192,
"wires": 744,
"woman": 847,
"women": 227,
"wood": 723,
"woods": 36,
"working": 282,
"writing": 394,
"yamaha": 955,
"years": 325,
"yellow": 2,
"yellow and blue": 62,
"yellow and green": 428,
"yellow and orange": 688,
"yellow and red": 693,
"yellow and white": 823,
"yes": 56,
"young": 37,
"zebra": 115,
"zoo": 991
},
"layer_norm_eps": 1e-12,
"max_image_length": -1,
"max_position_embeddings": 40,
"modality_type_vocab_size": 2,
"model_type": "vilt",
"num_attention_heads": 12,
"num_channels": 3,
"num_hidden_layers": 12,
"num_images": -1,
"patch_size": 32,
"qkv_bias": true,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.33.1",
"type_vocab_size": 2,
"vocab_size": 30522
}