diff --git "a/data_text_recog_glyph/COCO_Text/data-info.json" "b/data_text_recog_glyph/COCO_Text/data-info.json" new file mode 100644--- /dev/null +++ "b/data_text_recog_glyph/COCO_Text/data-info.json" @@ -0,0 +1 @@ +{"data_root": "/data/vdb/yuxiang.tyx/AIGC/data/ocr_data/COCO_Text/images", "data_list": [{"img_name": "COCO_train2014_000000524311.jpg", "caption": "a man holding a remote control in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524314.jpg", "caption": "a man sitting on the ground with a stuffed bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000393241.jpg", "caption": "two people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000393251.jpg", "caption": "a man and a boy cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262184.jpg", "caption": "a forklift truck parked next to a large crane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131118.jpg", "caption": "a small plane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524338.jpg", "caption": "a man standing on a sidewalk reading a newspaper", "annotations": [{"polygon": [[296, 298], [295, 383], [334, 379], [335, 378], [336, 345], [334, 298]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "m", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131126.jpg", "caption": "a street sign for brooklyn and atloh", "annotations": [{"polygon": [[183, 226], [180, 197], [279, 163], [281, 205]], "text": "Athol", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Athol", "recog_valid": true, "glyph_recog_text": "Athol", "glyph_recog_ld": 1.0}, {"polygon": [[143, 227], [331, 265], [330, 308], [140, 281]], "text": "Brooklyn", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Brooklyn", "recog_valid": true, "glyph_recog_text": "Brooklyn", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262221.jpg", "caption": "a man standing on a tennis court", "annotations": [{"polygon": [[358, 349], [358, 349], [390, 337], [390, 337], [395, 368], [395, 368], [363, 382]], "text": "w", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "W", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[280, 320], [324, 341], [324, 341], [315, 388], [315, 388], [276, 365]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "W", "recog_valid": true, "glyph_recog_text": "w", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131174.jpg", "caption": "a large jet airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000109.jpg", "caption": "a view of the river from a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131190.jpg", "caption": "a large red building with a clock tower on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524420.jpg", "caption": "a woman walking her dog on a city sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000138.jpg", "caption": "a kitchen with a stove, sink, refrigerator and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000142.jpg", "caption": "a plate with a sandwich and a jar of peanut butter", "annotations": [{"polygon": [[139, 26], [146, 111], [270, 80], [266, 13]], "text": "Silk", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Silk", "recog_valid": true, "glyph_recog_text": "Silk", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000151.jpg", "caption": "a man looking out the window of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131225.jpg", "caption": "a blue and yellow bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262299.jpg", "caption": "a person riding a dirt bike in the air", "annotations": [{"polygon": [[165, 218], [165, 218], [177, 243], [195, 235], [180, 211]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "20", "recog_valid": true, "glyph_recog_text": "20", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262329.jpg", "caption": "a clock on the side of a building with pink flowers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000393412.jpg", "caption": "a small orange van parked in a garage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131299.jpg", "caption": "a man standing next to his car with skis and a backpack", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000247.jpg", "caption": "a small airplane with a propeller on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000250.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[78, 293], [78, 302], [83, 310], [194, 351], [194, 327], [96, 289], [87, 285], [82, 286]], "text": "GREENWICH ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "GREENWICH", "recog_valid": false, "glyph_recog_text": "GREENWICH ST", "glyph_recog_ld": 0.7500002083331597}, {"polygon": [[312, 338], [318, 356], [376, 317], [376, 306], [380, 290]], "text": "VESEYs", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VESEY", "recog_valid": false, "glyph_recog_text": "VESEYs", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262399.jpg", "caption": "a woman eating a piece of fish at a fish market", "annotations": [{"polygon": [[461, 105], [509, 112], [507, 135], [460, 128]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "HAPY", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131330.jpg", "caption": "seedlings in pots with wooden spoons", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000260.jpg", "caption": "a woman pushing a cart with luggage at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524559.jpg", "caption": "a tennis player is about to hit the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524557.jpg", "caption": "a group of kids playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262442.jpg", "caption": "a bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262446.jpg", "caption": "a yellow sign has texts in the middle of a snow covered field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131400.jpg", "caption": "a train traveling down the tracks near a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262492.jpg", "caption": "a southwest airlines plane on the runway", "annotations": [{"polygon": [[446, 194], [455, 201], [407, 264], [399, 256]], "text": "SOUTHWEST", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SCUTHWEST", "recog_valid": false, "glyph_recog_text": "SCUTIVEST", "glyph_recog_ld": 0.7777780246910837}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524645.jpg", "caption": "a woman in a striped dress preparing food in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524649.jpg", "caption": "a little girl is looking at the fridge", "annotations": [{"polygon": [[372, 347], [382, 365], [393, 384], [404, 393], [399, 401], [383, 388], [374, 372], [368, 357]], "text": "CATTLEMEN'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STEAAES", "recog_valid": false, "glyph_recog_text": "CATTLEMENS", "glyph_recog_ld": 0.3000006999993}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524651.jpg", "caption": "two women standing in the snow with ski poles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000368.jpg", "caption": "a young boy kicking a soccer ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131449.jpg", "caption": "two buses are parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131450.jpg", "caption": "a person walking down a snowy street with skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262529.jpg", "caption": "a man in a red shirt is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000393602.jpg", "caption": "a train traveling down the tracks in a rural area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262528.jpg", "caption": "a baseball player jumping in the air to catch a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524676.jpg", "caption": "a street sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000393608.jpg", "caption": "a pizza with cheese and mushrooms", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131465.jpg", "caption": "a man playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262541.jpg", "caption": "a large jet airplane flying over a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000393634.jpg", "caption": "a woman holding an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131509.jpg", "caption": "a window display of toys", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524736.jpg", "caption": "a group of people standing next to a bunch of surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000471.jpg", "caption": "a yellow school bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131547.jpg", "caption": "a toilet in a small bathroom with a picture on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000529.jpg", "caption": "a couple on a dirt bike at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262673.jpg", "caption": "a man on a skateboard doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000532.jpg", "caption": "a red and black bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000540.jpg", "caption": "a japanese airbus a380-800 at tokyo airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524830.jpg", "caption": "a man with a backpack standing at a crosswalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000393762.jpg", "caption": "a black and white photo of a sign post", "annotations": [{"polygon": [[223, 223], [286, 191], [293, 202], [232, 235]], "text": "WHITWICK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WHITWICK", "recog_valid": true, "glyph_recog_text": "WHITWICK", "glyph_recog_ld": 1.0}, {"polygon": [[35, 303], [84, 278], [86, 293], [42, 316]], "text": "MOUNT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "MOUNT", "recog_valid": true, "glyph_recog_text": "MOUNT", "glyph_recog_ld": 1.0}, {"polygon": [[89, 275], [125, 256], [127, 270], [94, 287], [89, 279]], "text": "SAINT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SAINT", "recog_valid": true, "glyph_recog_text": "SANT", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[102, 287], [142, 266], [144, 282], [106, 302]], "text": "ABBEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ABBEY", "recog_valid": true, "glyph_recog_text": "ABBEY", "glyph_recog_ld": 1.0}, {"polygon": [[71, 322], [74, 335], [81, 336], [111, 318], [106, 306]], "text": "OAKS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "OAKS", "recog_valid": true, "glyph_recog_text": "OAKS", "glyph_recog_ld": 1.0}, {"polygon": [[33, 324], [39, 337], [101, 304], [94, 292]], "text": "BERNARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "BERNARD", "recog_valid": true, "glyph_recog_text": "BERNARO", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262704.jpg", "caption": "a notebook, a cell phone, a mp3 player, a camera, a pen, a wallet, a purse, a purse strap, a purse, a", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262707.jpg", "caption": "a young boy swinging a baseball bat on a baseball field", "annotations": [{"polygon": [[251, 288], [278, 293], [301, 301], [298, 340], [250, 333]], "text": "66", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "66", "recog_valid": true, "glyph_recog_text": "66", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262710.jpg", "caption": "a man standing next to a red truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524866.jpg", "caption": "a laptop computer and a cell phone sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131666.jpg", "caption": "air france a380-800", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000605.jpg", "caption": "a cup of coffee and a pastry on a table", "annotations": [{"polygon": [[438, 238], [446, 237], [489, 289], [482, 292]], "text": "ROASTED", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ROASTED", "recog_valid": true, "glyph_recog_text": "码心h9te6", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[416, 243], [436, 238], [500, 320], [477, 325]], "text": "DROP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "DROP", "recog_valid": true, "glyph_recog_text": "DROP", "glyph_recog_ld": 1.0}, {"polygon": [[398, 246], [414, 243], [473, 326], [456, 330]], "text": "COFFEE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "COFFEE", "recog_valid": true, "glyph_recog_text": "COFFEE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131678.jpg", "caption": "a large airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131703.jpg", "caption": "two women playing tennis", "annotations": [{"polygon": [[9, 269], [16, 227], [51, 232], [46, 269]], "text": "D", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "D", "recog_valid": true, "glyph_recog_text": "D", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524946.jpg", "caption": "a couple of people standing in front of a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131731.jpg", "caption": "a black bear standing in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000524956.jpg", "caption": "a group of people sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000681.jpg", "caption": "a bus parked on a street with a sun setting behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000393900.jpg", "caption": "a large airplane sitting on top of an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262851.jpg", "caption": "a parking meter with a credit card reader", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000393943.jpg", "caption": "a train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000393945.jpg", "caption": "a red motorcycle parked on a road in the woods", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000393954.jpg", "caption": "a man is speaking to a group of people in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262884.jpg", "caption": "a woman playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262893.jpg", "caption": "two boys in blue and yellow uniforms playing soccer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525039.jpg", "caption": "a man throwing a frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000753.jpg", "caption": "a giraffe standing in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262917.jpg", "caption": "a skier in the air", "annotations": [{"polygon": [[313, 167], [323, 166], [357, 216], [346, 219], [317, 179], [310, 168]], "text": "FISCHER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EISCHER", "recog_valid": false, "glyph_recog_text": "FISCNER", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131865.jpg", "caption": "a baseball player in a uniform throwing a ball", "annotations": [{"polygon": [[187, 230], [223, 232], [222, 277], [188, 277]], "text": "LB", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0Q", "recog_valid": false, "glyph_recog_text": "J", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000795.jpg", "caption": "people walking on the platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394019.jpg", "caption": "a hand holding a bagel", "annotations": [{"polygon": [[44, 341], [50, 296], [63, 267], [80, 242], [95, 227], [80, 210], [60, 224], [41, 248], [23, 288], [16, 317], [15, 350], [19, 355]], "text": "-VIATEUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NIATEUR", "recog_valid": false, "glyph_recog_text": "IATEUR", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[17, 366], [22, 388], [31, 408], [38, 410], [60, 397], [59, 384], [45, 356]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "LS", "recog_valid": false, "glyph_recog_text": "ST", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131902.jpg", "caption": "a white car with a digital clock on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000262976.jpg", "caption": "a bus driving down a dirt road with a dirt road behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000131952.jpg", "caption": "a white truck with a trailer attached to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263031.jpg", "caption": "a person riding a motorcycle on a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525176.jpg", "caption": "a computer desk with a keyboard, mouse, and a monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525180.jpg", "caption": "a group of people working in a commercial kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263039.jpg", "caption": "two teddy bears sitting on top of wooden sticks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000000897.jpg", "caption": "a person laying in the snow on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525189.jpg", "caption": "a clock is mounted on the wall of a hallway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394126.jpg", "caption": "a street lined with trees and a speed limit sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525206.jpg", "caption": "a man in a yellow shirt is jumping on a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394139.jpg", "caption": "a bus driving down a street with cars parked on either side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132000.jpg", "caption": "a woman eating a banana", "annotations": [{"polygon": [[513, 137], [466, 148], [466, 168], [512, 159]], "text": "ShopR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ShgpR", "recog_valid": false, "glyph_recog_text": "ShopF", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[446, 184], [492, 177], [492, 211], [447, 216]], "text": "Peanut", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "feanut", "recog_valid": false, "glyph_recog_text": "Peanut", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132002.jpg", "caption": "a plastic container with rice and a cat on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263104.jpg", "caption": "a double decker bus parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263111.jpg", "caption": "a woman selling food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525264.jpg", "caption": "two pictures of a man playing tennis", "annotations": [{"polygon": [[109, 233], [109, 297], [209, 300], [207, 238]], "text": "e", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Lse", "recog_valid": false, "glyph_recog_text": "e", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525273.jpg", "caption": "a laptop with a man on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394209.jpg", "caption": "a man standing in front of an open refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394214.jpg", "caption": "a woman giving a thumbs up in front of a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525299.jpg", "caption": "a train traveling on tracks near a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394229.jpg", "caption": "a bench sitting under a tree in front of a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001014.jpg", "caption": "two men standing next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132099.jpg", "caption": "a cat wearing a tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132106.jpg", "caption": "a plate with chicken and broccoli on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132114.jpg", "caption": "a young boy laying on the ground with his skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132123.jpg", "caption": "a stop sign and a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263204.jpg", "caption": "a stop sign with a sticker on it", "annotations": [{"polygon": [[175, 81], [218, 69], [218, 92], [174, 102]], "text": "BLVD.", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BLVD", "recog_valid": false, "glyph_recog_text": "BLVD.", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[126, 93], [164, 83], [166, 104], [127, 114]], "text": "PICO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PICO", "recog_valid": true, "glyph_recog_text": "PICO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394283.jpg", "caption": "a woman in a black jacket is mixing something in a large pot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394286.jpg", "caption": "a woman in a blue skirt and white shirt is hitting a tennis ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001090.jpg", "caption": "a train traveling through a city with a mountain in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525380.jpg", "caption": "a delta airplane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525398.jpg", "caption": "a train car with a sign that says detroit on it", "annotations": [{"polygon": [[283, 238], [282, 271], [320, 278], [322, 243]], "text": "Grande", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "toke", "recog_valid": false, "glyph_recog_text": "Graode", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132193.jpg", "caption": "a clock on a pole in a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132196.jpg", "caption": "a man sitting at a table with boxes and a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263270.jpg", "caption": "people walking on the side of a train track near a village", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525432.jpg", "caption": "a banana, cucumber, and a peach on a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132217.jpg", "caption": "a woman on skis is running down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525438.jpg", "caption": "a man walking past a store with a sign that says clear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001168.jpg", "caption": "a red airplane hanging from the ceiling", "annotations": [{"polygon": [[84, 147], [78, 165], [124, 178], [126, 164]], "text": "NIIDR", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NIIDR", "recog_valid": true, "glyph_recog_text": "NIDR", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132262.jpg", "caption": "a note left on a parking meter in the city of london", "annotations": [{"polygon": [[93, 216], [142, 247], [143, 282], [95, 253]], "text": "C-524", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CA", "recog_valid": false, "glyph_recog_text": "C-524", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[272, 332], [341, 315], [339, 300], [271, 317]], "text": "would", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cutuf", "recog_valid": false, "glyph_recog_text": "wauld", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[354, 349], [419, 334], [417, 316], [356, 333]], "text": "minutes", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "unube", "recog_valid": false, "glyph_recog_text": "minutes", "glyph_recog_ld": 0.428572244896793}, {"polygon": [[268, 337], [364, 311], [362, 331], [304, 357], [266, 349]], "text": "regular", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": " g c", "recog_valid": false, "glyph_recog_text": "regular", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394415.jpg", "caption": "a baseball player walking on a baseball field", "annotations": [{"polygon": [[237, 255], [239, 282], [304, 273], [305, 251]], "text": "Twins", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tuins", "recog_valid": false, "glyph_recog_text": "Twins", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132290.jpg", "caption": "a man on a motorcycle is parked in front of an old building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394436.jpg", "caption": "a blue and orange train engine sitting on the tracks", "annotations": [{"polygon": [[245, 242], [254, 238], [270, 241], [275, 245], [280, 266], [273, 271], [255, 270], [248, 266]], "text": "PNR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PR", "recog_valid": false, "glyph_recog_text": "R", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525510.jpg", "caption": "a car driving down a street with traffic lights", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001224.jpg", "caption": "two giraffes eating hay from a basket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394447.jpg", "caption": "man holding a hot dog", "annotations": [{"polygon": [[72, 1], [71, 21], [135, 34], [134, 5], [114, 0]], "text": "ORT", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "ORT", "recog_valid": true, "glyph_recog_text": "ORT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263385.jpg", "caption": "a man in glasses holding a remote control", "annotations": [{"polygon": [[96, 436], [101, 422], [31, 405], [29, 420]], "text": "SONY", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "SONY", "recog_valid": true, "glyph_recog_text": "SONY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525533.jpg", "caption": "a man in a wheelchair eating pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525539.jpg", "caption": "a motorcycle is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263400.jpg", "caption": "a bench and a sign in the woods", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525555.jpg", "caption": "a beach umbrella and two lounge chairs on a sandy beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525567.jpg", "caption": "a woman riding a bike on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525572.jpg", "caption": "a woman in a green hat is playing tennis", "annotations": [{"polygon": [[0, 215], [0, 268], [69, 269], [70, 230]], "text": "Emirates", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "6", "recog_valid": false, "glyph_recog_text": "Eniates", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[83, 221], [76, 269], [166, 271], [160, 223]], "text": "Airline", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Aidies", "recog_valid": false, "glyph_recog_text": "Airline", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525580.jpg", "caption": "a woman swinging a bat at a man in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263440.jpg", "caption": "a little girl sitting on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394518.jpg", "caption": "a cross country skier is racing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001311.jpg", "caption": "a delta airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394529.jpg", "caption": "a woman in a white coat and knee high socks is walking down the street", "annotations": [{"polygon": [[389, 110], [386, 127], [432, 116], [435, 97]], "text": "Panini", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Fanini", "recog_valid": false, "glyph_recog_text": "Panini", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263458.jpg", "caption": "a small bird sitting on top of a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263462.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132395.jpg", "caption": "a man on a skateboard in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132405.jpg", "caption": "a cow grazing in a field with a blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263479.jpg", "caption": "a highway with construction cones and traffic cones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132412.jpg", "caption": "two trains are parked at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263493.jpg", "caption": "a dog and a cat looking at each other in a bathtub", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525640.jpg", "caption": "a banana with a string attached to it", "annotations": [{"polygon": [[456, 377], [486, 414], [498, 398], [474, 366]], "text": "Otis", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "O+e", "recog_valid": false, "glyph_recog_text": "Otts", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001381.jpg", "caption": "a man driving a tractor with a herd of sheep", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001386.jpg", "caption": "a tray with a sandwich and fries on it", "annotations": [{"polygon": [[236, 168], [237, 186], [304, 156], [306, 143]], "text": "DAST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "(DAST", "recog_valid": false, "glyph_recog_text": "DAST", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[232, 144], [233, 161], [313, 135], [310, 123]], "text": "PRIME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PRIME", "recog_valid": true, "glyph_recog_text": "PRIME", "glyph_recog_ld": 1.0}, {"polygon": [[199, 132], [201, 148], [261, 125], [261, 114]], "text": "EDD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EDL", "recog_valid": false, "glyph_recog_text": "EDO", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[382, 166], [376, 186], [394, 198], [417, 206], [444, 205], [452, 203], [460, 181], [446, 186], [410, 183], [389, 173]], "text": "PEPSI", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "PEPS", "recog_valid": false, "glyph_recog_text": "PEPSI", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525675.jpg", "caption": "a tennis player is swinging his racket at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525678.jpg", "caption": "a man and a woman posing with their luggage", "annotations": [{"polygon": [[78, 248], [201, 234], [199, 251], [93, 259], [93, 263], [89, 264], [86, 260], [82, 261], [78, 259], [77, 255]], "text": "TAW", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "QL。TAW/", "recog_valid": false, "glyph_recog_text": "TAW", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001408.jpg", "caption": "a large airplane sitting on top of an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132495.jpg", "caption": "a man on a skateboard doing a trick in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263576.jpg", "caption": "three people holding cell phones and one is using a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263589.jpg", "caption": "a restaurant with a sign that says open 24 hours", "annotations": [{"polygon": [[352, 280], [353, 325], [428, 324], [424, 283]], "text": "Chicagos", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Cngoi", "recog_valid": false, "glyph_recog_text": "Chicagos", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132521.jpg", "caption": "a street with a stop sign and a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263599.jpg", "caption": "a china airlines airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263604.jpg", "caption": "a window display with two mannequins in lingerie and tennis balls", "annotations": [{"polygon": [[80, 144], [80, 110], [212, 109], [214, 144]], "text": "ANYONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ANYOINE", "recog_valid": false, "glyph_recog_text": "ANYONE", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[377, 243], [439, 243], [439, 279], [379, 279]], "text": "FOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "FOR", "recog_valid": true, "glyph_recog_text": "FOR", "glyph_recog_ld": 1.0}, {"polygon": [[108, 417], [109, 454], [229, 453], [228, 417]], "text": "TENNISE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TENINIS", "recog_valid": false, "glyph_recog_text": "TENNISE", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132548.jpg", "caption": "a street sign and a traffic light in new york city", "annotations": [{"polygon": [[137, 164], [134, 172], [185, 216], [188, 209]], "text": "CHEVROLET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CHEVROLET", "recog_valid": true, "glyph_recog_text": "PiHnvheLXr", "glyph_recog_ld": 0.20000079999919995}, {"polygon": [[86, 106], [86, 112], [128, 149], [131, 143]], "text": "TOSHIE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "TOSHIE", "recog_valid": true, "glyph_recog_text": "ty9.i3", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263639.jpg", "caption": "a train pulling into a station with a yellow and white train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132591.jpg", "caption": "a man and two boys standing on the beach with a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394739.jpg", "caption": "a street sign with a hand gesture on it", "annotations": [{"polygon": [[231, 89], [232, 93], [273, 66], [271, 62]], "text": "BECKINELLA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CTCKINELL", "recog_valid": false, "glyph_recog_text": "P\"tt", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001526.jpg", "caption": "a microwave and a small refrigerator sitting next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001536.jpg", "caption": "a pizza on a pan on a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263703.jpg", "caption": "a woman looking at a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394776.jpg", "caption": "a small red car is stopped at a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394783.jpg", "caption": "a stop sign with a bicycle on it", "annotations": [{"polygon": [[206, 215], [199, 248], [318, 276], [324, 247]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[197, 266], [199, 283], [248, 296], [249, 279]], "text": "WHEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WHEN", "recog_valid": true, "glyph_recog_text": "WHEN", "glyph_recog_ld": 1.0}, {"polygon": [[260, 281], [260, 298], [320, 312], [320, 296]], "text": "LIGHTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LIGHTS", "recog_valid": true, "glyph_recog_text": "LUGHTS", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[239, 301], [238, 319], [327, 339], [326, 322]], "text": "FLASHING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FLASHING", "recog_valid": true, "glyph_recog_text": "FLASHING", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263714.jpg", "caption": "a bed with a red and white pillow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001586.jpg", "caption": "two women sitting on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132668.jpg", "caption": "two cakes with candles on them sitting on plates", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000525913.jpg", "caption": "black and white photo of a bedroom with a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394866.jpg", "caption": "a green clock on a building with a light on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394879.jpg", "caption": "a man and a woman standing in front of a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132760.jpg", "caption": "a woman walking a horse in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394905.jpg", "caption": "a boy on a skateboard doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001688.jpg", "caption": "a dog sitting on a chair with a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132768.jpg", "caption": "a laptop computer screen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132773.jpg", "caption": "two airplanes are parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263845.jpg", "caption": "a table with flowers in vases on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001712.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001720.jpg", "caption": "a blue bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263883.jpg", "caption": "a traffic light on a street with cars driving by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394970.jpg", "caption": "a yellow bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526043.jpg", "caption": "a british airways plane parked at the gate of an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001764.jpg", "caption": "a tennis player is playing a game of tennis", "annotations": [{"polygon": [[395, 351], [364, 349], [367, 327], [381, 294], [402, 295]], "text": "MELBOURNE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "auinoqjew", "recog_valid": false, "glyph_recog_text": "浙街山园司", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001771.jpg", "caption": "a bedroom with a bed, desk and chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001774.jpg", "caption": "a man riding a skateboard in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000394992.jpg", "caption": "a group of young women are posing for a photo", "annotations": [{"polygon": [[17, 38], [17, 38], [50, 38], [50, 73], [17, 73]], "text": "G", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "G", "recog_valid": true, "glyph_recog_text": "G", "glyph_recog_ld": 1.0}, {"polygon": [[65, 71], [98, 72], [81, 37]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "T", "recog_valid": false, "glyph_recog_text": "A", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[127, 37], [112, 72], [144, 73]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "A", "recog_valid": true, "glyph_recog_text": "y", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[167, 38], [174, 74], [351, 72], [348, 37]], "text": "TENNIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "TENNIS", "recog_valid": true, "glyph_recog_text": "TENNIS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395005.jpg", "caption": "a man in armor talking on a cell phone", "annotations": [{"polygon": [[63, 233], [91, 248], [99, 255], [90, 266], [64, 252]], "text": "5,", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "15、", "recog_valid": false, "glyph_recog_text": "5.", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132874.jpg", "caption": "two hot dogs on a bun", "annotations": [{"polygon": [[91, 109], [105, 106], [120, 103], [132, 98], [144, 92], [147, 108], [136, 114], [123, 119], [109, 122], [94, 125]], "text": "FRED", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "FRED", "recog_valid": true, "glyph_recog_text": "FRED、", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[94, 126], [109, 124], [124, 120], [141, 112], [151, 104], [156, 95], [159, 111], [153, 119], [146, 126], [135, 132], [121, 137], [99, 142]], "text": "EVERY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "EVERT", "recog_valid": false, "glyph_recog_text": "EVERY", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132878.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001811.jpg", "caption": "a man on a skateboard doing a trick in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132883.jpg", "caption": "a street sign with graffiti on it", "annotations": [{"polygon": [[272, 242], [396, 244], [395, 278], [270, 273]], "text": "odobrenjem", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "odobrenjem", "recog_valid": true, "glyph_recog_text": "odobrenjenr", "glyph_recog_ld": 0.8181819834709241}, {"polygon": [[346, 103], [360, 149], [402, 166], [407, 129], [382, 113]], "text": "TRY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TPU", "recog_valid": false, "glyph_recog_text": "TRY", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[248, 133], [264, 169], [350, 162], [346, 127], [307, 115]], "text": "LINE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AQ", "recog_valid": false, "glyph_recog_text": "LINE", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395030.jpg", "caption": "a lunch box with vegetables and a hello kitty", "annotations": [{"polygon": [[55, 174], [123, 115], [118, 111], [115, 107], [112, 107], [44, 166]], "text": "HELLO KITTY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HELLOEITTY", "recog_valid": false, "glyph_recog_text": "HELLOKIRTY", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395040.jpg", "caption": "a person in a red and white outfit skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395050.jpg", "caption": "a cow and her calf in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132908.jpg", "caption": "a bus parked on the side of the road", "annotations": [{"polygon": [[268, 239], [379, 254], [378, 278], [268, 270]], "text": "LEROY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LEROY", "recog_valid": true, "glyph_recog_text": "LEROY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132913.jpg", "caption": "a surfer riding a wave in the ocean", "annotations": [{"polygon": [[268, 185], [491, 188], [490, 217], [265, 206]], "text": "Pabelinho@Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Fabelinhe Ghotegraply", "recog_valid": false, "glyph_recog_text": "abelinho@Photograph", "glyph_recog_ld": 0.6666668253967498}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263993.jpg", "caption": "a ship in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000263995.jpg", "caption": "a group of motorcycles parked in a line on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001877.jpg", "caption": "a train on a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132954.jpg", "caption": "two cyclists ride in the rain on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395101.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001902.jpg", "caption": "a bed with a white comforter and a yellow and blue floral blanket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001915.jpg", "caption": "a man on a motorcycle with a dog on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001924.jpg", "caption": "a street sign with a yellow sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264076.jpg", "caption": "a man riding a skateboard down a set of steps", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526227.jpg", "caption": "a bed with a white comforter and a blue pillow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133011.jpg", "caption": "a skateboarder is doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001941.jpg", "caption": "an old black and white photo of a street with horse drawn carriages", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264091.jpg", "caption": "a man walking on the beach with a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001958.jpg", "caption": "two women playing a video game in a living room", "annotations": [{"polygon": [[500, 384], [467, 406], [496, 443], [512, 429], [512, 398]], "text": "t", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "P", "recog_valid": false, "glyph_recog_text": "一", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395192.jpg", "caption": "a hot dog on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133048.jpg", "caption": "a side view mirror on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395198.jpg", "caption": "a woman riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001997.jpg", "caption": "rockfeller plaza - new york city", "annotations": [{"polygon": [[160, 153], [386, 185], [387, 217], [158, 183]], "text": "ROCKEFELLER PLAZA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ROCKEFELLER", "recog_valid": false, "glyph_recog_text": "ROCKEFELLER PLAZA", "glyph_recog_ld": 0.6470590311417463}, {"polygon": [[98, 236], [195, 232], [195, 267], [98, 273]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[2, 240], [40, 240], [39, 274], [1, 275]], "text": "IE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "IE", "recog_valid": true, "glyph_recog_text": "IE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395216.jpg", "caption": "a group of people riding skateboards down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526291.jpg", "caption": "a street with a lot of people walking on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264151.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395241.jpg", "caption": "two people standing next to a tree in a town", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395242.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395263.jpg", "caption": "a rafael f-16 fighter jet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395269.jpg", "caption": "a bunch of brown bread shaped like teddy bears", "annotations": [{"polygon": [[179, 78], [172, 84], [181, 94], [201, 112], [207, 104]], "text": "HOKUO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HOKUO", "recog_valid": true, "glyph_recog_text": "HOao", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526347.jpg", "caption": "a man riding a skateboard", "annotations": [{"polygon": [[64, 341], [131, 326], [200, 362], [159, 408], [79, 395]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "4", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264214.jpg", "caption": "two motorcyclists racing down a winding road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133145.jpg", "caption": "a fire truck with an american flag on the front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526362.jpg", "caption": "a woman walking down the street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002083.jpg", "caption": "a woman standing in a kitchen with a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264236.jpg", "caption": "a suitcase with clothes and a blanket on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264241.jpg", "caption": "a group of people standing on a street with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395314.jpg", "caption": "a yellow bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133175.jpg", "caption": "a bathroom with a sink, mirror and towel rack", "annotations": [{"polygon": [[378, 432], [418, 467], [409, 468], [367, 439]], "text": "Colgate", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Caare", "recog_valid": false, "glyph_recog_text": "Coigaie", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133183.jpg", "caption": "a building with a clock on the front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395339.jpg", "caption": "a man on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395340.jpg", "caption": "a pirate ship is traveling down the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002135.jpg", "caption": "a woman sitting on a bench under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395365.jpg", "caption": "a young boy is standing on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526441.jpg", "caption": "a bus driving down a street with a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133229.jpg", "caption": "two women sitting at a desk with a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526445.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264320.jpg", "caption": "a plate with a slice of pizza and a salad on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264322.jpg", "caption": "a man in a kitchen preparing food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264350.jpg", "caption": "a man running on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264356.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264358.jpg", "caption": "a man taking a picture of hot dogs with faces on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264362.jpg", "caption": "a plate of cheese, meat and other food items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264371.jpg", "caption": "a close up of a toothbrush and toothpaste", "annotations": [{"polygon": [[260, 373], [268, 402], [352, 411], [349, 378]], "text": "Oral-B", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "OralB", "recog_valid": false, "glyph_recog_text": "Oral-B", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264395.jpg", "caption": "a baby boy sitting on the floor eating an orange", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395469.jpg", "caption": "a bench with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395473.jpg", "caption": "a skateboarder is doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264406.jpg", "caption": "a group of cell phones and a game console", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526554.jpg", "caption": "a person sitting on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002278.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395498.jpg", "caption": "a person taking a picture of a stop sign", "annotations": [{"polygon": [[400, 195], [401, 230], [327, 228], [327, 190]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395504.jpg", "caption": "a man in a black shirt is moving a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264456.jpg", "caption": "a traffic light on a pole with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133403.jpg", "caption": "a man standing next to a bicycle with a basket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002337.jpg", "caption": "a dog wearing a shirt on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395560.jpg", "caption": "george brown and horse cart at the entrance to middletown p", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264495.jpg", "caption": "a man riding a skateboard down a rail", "annotations": [{"polygon": [[245, 38], [249, 65], [402, 53], [387, 20], [353, 17], [298, 28]], "text": "RiTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "R°丁益", "recog_valid": false, "glyph_recog_text": "RiTA", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[198, 75], [218, 157], [331, 245], [389, 245], [392, 58]], "text": "Rita", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Rita", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264514.jpg", "caption": "a dog and a cat are sitting on a pile of blankets", "annotations": [{"polygon": [[162, 282], [162, 282], [163, 338], [238, 335], [240, 361], [247, 361], [248, 334], [263, 333], [262, 308], [232, 308], [228, 283]], "text": "Help", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Help", "recog_valid": true, "glyph_recog_text": "Help", "glyph_recog_ld": 1.0}, {"polygon": [[17, 284], [12, 347], [53, 345], [148, 335], [144, 310], [54, 316], [50, 283]], "text": "please", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Please", "recog_valid": false, "glyph_recog_text": "please", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[12, 369], [12, 390], [58, 386], [56, 376], [50, 361], [40, 360], [28, 368]], "text": "with", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "with", "recog_valid": true, "glyph_recog_text": "with", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526681.jpg", "caption": "a young boy eating a donut at a table", "annotations": [{"polygon": [[198, 270], [201, 285], [248, 267], [245, 254], [204, 266]], "text": "SEARCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SEARCH", "recog_valid": true, "glyph_recog_text": "SEARCH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395614.jpg", "caption": "a red train is stopped at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526695.jpg", "caption": "a bed with a white sheet and a white pillow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526697.jpg", "caption": "a rack of surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133482.jpg", "caption": "a man in a red uniform on a horse with a canadian flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526701.jpg", "caption": "a model of a traffic sign with cars and trucks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133492.jpg", "caption": "a group of people posing for a picture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133510.jpg", "caption": "a baseball game on tv with a batter and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002446.jpg", "caption": "a boy kicking a soccer ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002448.jpg", "caption": "a kitchen with a refrigerator, stove and two chairs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002445.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002471.jpg", "caption": "a cat standing on a table next to a bottle", "annotations": [{"polygon": [[124, 319], [125, 337], [170, 323], [170, 301]], "text": "COCA COLA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cang", "recog_valid": false, "glyph_recog_text": "OKA COA", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[68, 443], [91, 481], [113, 476], [85, 434]], "text": "ONLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "ONLY", "recog_valid": true, "glyph_recog_text": "ONLY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264618.jpg", "caption": "a kitchen counter with a toaster, coffee maker, and other appliances", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264620.jpg", "caption": "a bathroom with a sink, toilet and a bathtub", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526769.jpg", "caption": "a man in a suit and tie riding a motorcycle", "annotations": [{"polygon": [[0, 137], [1, 203], [110, 196], [110, 139]], "text": "EEP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "EEP", "recog_valid": true, "glyph_recog_text": "EEP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395703.jpg", "caption": "a yellow and blue water pump sitting on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395716.jpg", "caption": "a group of people standing on a snow covered slope", "annotations": [{"polygon": [[139, 430], [193, 447], [186, 470], [131, 455]], "text": "RIDE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "FEE", "recog_valid": false, "glyph_recog_text": "RIDE", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526797.jpg", "caption": "a silver train is parked at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395742.jpg", "caption": "a poster of two women's soccer players on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002536.jpg", "caption": "a group of people in the water with surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002543.jpg", "caption": "two children laying on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002545.jpg", "caption": "a train traveling down the tracks with a yellow and black train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395765.jpg", "caption": "a bus drives through a snow covered road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264695.jpg", "caption": "a dog running on the beach with a ball in its mouth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395768.jpg", "caption": "a baseball player is standing at home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002560.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[261, 151], [253, 179], [294, 172], [297, 143]], "text": "AEG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AEG", "recog_valid": true, "glyph_recog_text": "AEG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002570.jpg", "caption": "a donut with orange sprinkles and black icing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002575.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395791.jpg", "caption": "a lunch box with fruit, vegetables and a tortilla", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002583.jpg", "caption": "a man on a snowboard", "annotations": [{"polygon": [[105, 235], [116, 280], [176, 285], [163, 246]], "text": "RS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Sa", "recog_valid": false, "glyph_recog_text": "RS", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526877.jpg", "caption": "a row of motorcycles parked on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002591.jpg", "caption": "a display case with many clocks on display", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395828.jpg", "caption": "a red caboose sitting on the tracks next to a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133688.jpg", "caption": "a train car on display in a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395840.jpg", "caption": "a person holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526922.jpg", "caption": "a group of buses parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395853.jpg", "caption": "a group of people posing for a picture with a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002639.jpg", "caption": "two double decker buses are driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002644.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002645.jpg", "caption": "a hot dog on a bun", "annotations": [{"polygon": [[114, 216], [123, 219], [116, 229], [112, 237], [107, 245], [104, 249], [100, 250], [93, 246], [99, 238], [108, 224]], "text": "SPECIAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "7VI38S", "recog_valid": false, "glyph_recog_text": "BAECN", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133727.jpg", "caption": "a baseball player is swinging a bat", "annotations": [{"polygon": [[169, 126], [181, 132], [157, 166], [146, 158]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CO", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133731.jpg", "caption": "a cow is standing in a field with other cows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264806.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526953.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[305, 84], [421, 85], [420, 164], [304, 164]], "text": "Sydney", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Dydnsg", "recog_valid": false, "glyph_recog_text": "Sydney", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[71, 95], [115, 96], [116, 141], [72, 140]], "text": "ing", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "ing", "recog_valid": true, "glyph_recog_text": "i", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[72, 147], [108, 147], [110, 206], [72, 206]], "text": "ley", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "西", "recog_valid": false, "glyph_recog_text": "co", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395888.jpg", "caption": "a woman is preparing carrots in a pot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264823.jpg", "caption": "a tray of cupcakes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002687.jpg", "caption": "a traffic jam on a busy street with cars and trucks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526987.jpg", "caption": "a man with glasses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395920.jpg", "caption": "a bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527012.jpg", "caption": "a man is working on a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395957.jpg", "caption": "a green and red train engine on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395966.jpg", "caption": "a man is standing on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002758.jpg", "caption": "a skier is in the air doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264923.jpg", "caption": "a bicycle is parked in front of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264926.jpg", "caption": "a group of people standing on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527073.jpg", "caption": "a red wagon with teddy bears", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527082.jpg", "caption": "two motorcyclists racing on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527096.jpg", "caption": "a person's hand is on a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396039.jpg", "caption": "a cat and dog playing with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527112.jpg", "caption": "a man is riding a bike", "annotations": [{"polygon": [[176, 193], [184, 190], [190, 196], [200, 206], [211, 216], [205, 221], [198, 214], [187, 204], [181, 199]], "text": "CENTURIES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CENTURIES", "recog_valid": true, "glyph_recog_text": "srrdyera", "glyph_recog_ld": 1.1111098765503868e-06}, {"polygon": [[271, 74], [257, 192], [277, 191], [288, 69]], "text": "TAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Far", "recog_valid": false, "glyph_recog_text": "一", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527118.jpg", "caption": "a sign that says it's a minute walk to square", "annotations": [{"polygon": [[353, 206], [353, 221], [414, 205], [413, 187]], "text": "IS A MINUTE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MINUTE", "recog_valid": false, "glyph_recog_text": "IS A MINUTE", "glyph_recog_ld": 0.5454549586773103}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133908.jpg", "caption": "a fire hydrant on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133912.jpg", "caption": "a man sitting in a train car with a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002842.jpg", "caption": "a man and a woman on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264987.jpg", "caption": "a black and white photo of a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527134.jpg", "caption": "a yellow bus driving down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527139.jpg", "caption": "a couple of vases with flowers sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396080.jpg", "caption": "a traffic light and a sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265010.jpg", "caption": "a man in a kitchen preparing food in a commercial kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527155.jpg", "caption": "a basket of tomatoes on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133940.jpg", "caption": "a woman blowing out candles on a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002886.jpg", "caption": "a plate of food with potatoes, carrots, and meat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002902.jpg", "caption": "a banana, an apple, and a pear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133994.jpg", "caption": "a train is parked at a station with a platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396143.jpg", "caption": "a woman in a bikini standing on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265080.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[23, 218], [20, 197], [61, 187], [66, 203]], "text": "Roualo", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Rouala", "recog_valid": false, "glyph_recog_text": "Frualo", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265106.jpg", "caption": "a space shuttle and a large airplane flying together", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002982.jpg", "caption": "a train on a track with a bridge over it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265135.jpg", "caption": "a man holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134071.jpg", "caption": "a black and white photo of a biplane on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002998.jpg", "caption": "a busy city street with many signs and buildings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003003.jpg", "caption": "three trains on the tracks near a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396223.jpg", "caption": "a man with hairy chest and a tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265168.jpg", "caption": "a white bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134100.jpg", "caption": "three baseball players pose for a photo in a dugout", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134113.jpg", "caption": "a narrow street with a car parked in the middle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396257.jpg", "caption": "a group of people standing near a green bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265196.jpg", "caption": "a dog sitting at a table eating out of a bowl", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265207.jpg", "caption": "a street sign that says day ln", "annotations": [{"polygon": [[154, 261], [160, 194], [290, 212], [266, 276]], "text": "DAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DAY", "recog_valid": true, "glyph_recog_text": "DAY", "glyph_recog_ld": 1.0}, {"polygon": [[299, 215], [342, 221], [339, 257], [298, 251]], "text": "LN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LN", "recog_valid": true, "glyph_recog_text": "LN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265211.jpg", "caption": "a street sign that says cross country", "annotations": [{"polygon": [[299, 210], [300, 240], [366, 237], [365, 208]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396287.jpg", "caption": "hawaiian street food truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134144.jpg", "caption": "a wooden bowl filled with hot dogs and chicken", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396326.jpg", "caption": "a cat sitting on a bag on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396330.jpg", "caption": "three men standing in a gym", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265274.jpg", "caption": "two men sitting in a kitchen", "annotations": [{"polygon": [[356, 388], [371, 384], [390, 380], [395, 379], [399, 378], [402, 378], [409, 406], [400, 403], [395, 402], [385, 402], [374, 405], [360, 413]], "text": "ADIDAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "ADIDAS", "recog_valid": true, "glyph_recog_text": "ADIDAS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003160.jpg", "caption": "a group of people riding horses down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134236.jpg", "caption": "a street sign with a church in the background", "annotations": [{"polygon": [[206, 193], [209, 183], [261, 215], [256, 227]], "text": "SHAMBLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Shambles", "recog_valid": false, "glyph_recog_text": "SHAMELES", "glyph_recog_ld": 0.12500109374863277}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265333.jpg", "caption": "a bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527477.jpg", "caption": "a large passenger jet on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396412.jpg", "caption": "a man on a motorcycle is riding down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134268.jpg", "caption": "a statue of a man with a clock on his head", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396415.jpg", "caption": "a truck route sign on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396418.jpg", "caption": "a black and white photo of a baseball player", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265351.jpg", "caption": "a blue bus driving down a street in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003219.jpg", "caption": "a table with various electronic devices on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134294.jpg", "caption": "a white dresser with a microwave and a red hat on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003220.jpg", "caption": "a baseball game with people in the stands", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396455.jpg", "caption": "a person holding a cat and a remote control", "annotations": [{"polygon": [[173, 372], [211, 353], [217, 364], [184, 387]], "text": "ern's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "erns", "recog_valid": false, "glyph_recog_text": "ern's", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[197, 405], [204, 414], [234, 395], [224, 383]], "text": "ineapple", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "reapple", "recog_valid": false, "glyph_recog_text": "ine sapuphs", "glyph_recog_ld": 0.3636369421482344}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527554.jpg", "caption": "a man playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527557.jpg", "caption": "an old airplane in a garage with a car parked next to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003272.jpg", "caption": "a man holding a rainbow umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003286.jpg", "caption": "a cell phone sitting on a table next to a glass of water", "annotations": [{"polygon": [[140, 10], [144, 81], [309, 83], [305, 14]], "text": "LIFELINES", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Lifelines", "recog_valid": false, "glyph_recog_text": "LIFELINES", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396528.jpg", "caption": "a cake decorated with people on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003314.jpg", "caption": "a large clock on a stone building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396534.jpg", "caption": "a man holding a tennis racket", "annotations": [{"polygon": [[375, 382], [364, 423], [367, 426], [393, 427], [401, 414], [402, 385], [394, 379]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "G", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527615.jpg", "caption": "air france airbus a320-214 airbus a320-214 airbus a320-214 airbus a320-214 airbus a320-", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527624.jpg", "caption": "a horse wearing a red blanket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003348.jpg", "caption": "two men holding stuffed animals and a sign that says it's friday", "annotations": [{"polygon": [[194, 296], [263, 290], [299, 294], [299, 294], [301, 305], [197, 328]], "text": "@LinkHumans", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "@LincHirans", "recog_valid": false, "glyph_recog_text": "@LinkHumans", "glyph_recog_ld": 0.7272729752063862}, {"polygon": [[115, 336], [156, 327], [176, 327], [191, 349], [134, 388]], "text": "It's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "lt's", "recog_valid": false, "glyph_recog_text": "It's", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[161, 410], [161, 410], [150, 384], [173, 360], [249, 359], [255, 378]], "text": "Friday", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Friday", "recog_valid": true, "glyph_recog_text": "Friday", "glyph_recog_ld": 1.0}, {"polygon": [[191, 421], [219, 407], [230, 417], [194, 439]], "text": "great", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "greal", "recog_valid": false, "glyph_recog_text": "great", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003353.jpg", "caption": "a cat sleeping on a person's lap", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527649.jpg", "caption": "a woman sitting at a desk talking on the phone", "annotations": [{"polygon": [[463, 226], [466, 244], [475, 263], [493, 258], [476, 218], [468, 221]], "text": "Coke", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "32400", "recog_valid": false, "glyph_recog_text": "Coke", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003361.jpg", "caption": "a dog is holding a frisbee", "annotations": [{"polygon": [[365, 371], [304, 405], [299, 400], [323, 385], [342, 375], [362, 366]], "text": "ACTIVE2010", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "OLOZANN", "recog_valid": false, "glyph_recog_text": "As:vt2Ai", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527656.jpg", "caption": "trader vics los angeles", "annotations": [{"polygon": [[112, 206], [134, 230], [173, 223], [208, 221], [278, 216], [314, 215], [301, 192], [259, 195], [212, 196], [177, 197], [145, 199]], "text": "TRADER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TRADER", "recog_valid": true, "glyph_recog_text": "TRADER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527660.jpg", "caption": "two pictures of a stuffed bear wearing a sweater", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134454.jpg", "caption": "a baseball player swinging at a ball during a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265532.jpg", "caption": "a group of people standing in a large airport terminal", "annotations": [{"polygon": [[29, 64], [49, 67], [62, 70], [85, 73], [100, 73], [103, 97], [94, 98], [91, 87], [83, 87], [74, 85], [68, 82], [64, 80], [60, 80], [50, 80], [41, 83], [38, 85], [38, 91], [28, 89], [27, 72], [27, 66]], "text": "SPORTSMANS", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "SPORSMANS", "recog_valid": false, "glyph_recog_text": "SPORTSMANS", "glyph_recog_ld": 0.9000000999999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003389.jpg", "caption": "two brown and red birds sitting on a wooden bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396611.jpg", "caption": "a man with a suitcase on an escalator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265553.jpg", "caption": "a young boy is holding a baseball bat", "annotations": [{"polygon": [[253, 165], [253, 165], [264, 163], [270, 162], [284, 164], [288, 197], [288, 197], [287, 199], [270, 203], [253, 201]], "text": "15", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "15", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265557.jpg", "caption": "a bus with a sign on it that says free shuttle buses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134490.jpg", "caption": "a boy and his dog in a green truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396638.jpg", "caption": "a horse race track with people watching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396663.jpg", "caption": "a red and white bus parked next to a yellow bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265594.jpg", "caption": "a man holding a remote control", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134532.jpg", "caption": "a keyboard and mouse on a desk", "annotations": [{"polygon": [[398, 318], [433, 293], [441, 307], [406, 330]], "text": "evergude", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "EVERGLIDE", "recog_valid": false, "glyph_recog_text": "evergutte", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527748.jpg", "caption": "a person holding a cell phone with a text message", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003483.jpg", "caption": "a small computer with a keyboard and screen on it", "annotations": [{"polygon": [[137, 156], [137, 156], [136, 190], [150, 184], [186, 178], [230, 173], [230, 155], [221, 154], [212, 154], [184, 154], [177, 155], [158, 155], [143, 156]], "text": "PRINCE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PRInce", "recog_valid": false, "glyph_recog_text": "PRINCE", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[247, 156], [246, 176], [252, 174], [297, 180], [339, 189], [338, 157], [334, 155], [315, 156], [304, 155], [288, 156], [273, 155]], "text": "PERSIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PERSla", "recog_valid": false, "glyph_recog_text": "PERSIA", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003493.jpg", "caption": "a street with many signs and cars on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003511.jpg", "caption": "a pizza box on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134586.jpg", "caption": "a collage of pictures of people and things", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527816.jpg", "caption": "a large jetliner flying in the sky", "annotations": [{"polygon": [[291, 172], [291, 185], [361, 156], [359, 141], [291, 172]], "text": "transat", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "traisat", "recog_valid": false, "glyph_recog_text": "transat", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003532.jpg", "caption": "two women playing frisbee in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003535.jpg", "caption": "a man is working on a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527830.jpg", "caption": "a car parked in a parking lot at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527834.jpg", "caption": "a young boy in a blue shirt eating a sandwich", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134620.jpg", "caption": "a bus driving down a street with trees and buildings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134622.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396775.jpg", "caption": "a young boy holding an umbrella in the rain", "annotations": [{"polygon": [[300, 471], [348, 460], [343, 437], [299, 450], [298, 462]], "text": "CLUB", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "CIUB", "recog_valid": false, "glyph_recog_text": "CLUB", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396825.jpg", "caption": "two pictures of people and animals on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527907.jpg", "caption": "a group of people sitting on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265781.jpg", "caption": "a woman standing next to a pink food truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134717.jpg", "caption": "a train traveling down the tracks near a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265814.jpg", "caption": "a bus driving down a street with cars parked on either side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134752.jpg", "caption": "a cake on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003685.jpg", "caption": "a desk with a computer, books and a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003692.jpg", "caption": "a street sign on a building in edinburgh", "annotations": [{"polygon": [[240, 172], [238, 183], [239, 191], [341, 209], [345, 191]], "text": "STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STREET", "recog_valid": true, "glyph_recog_text": "STREET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396907.jpg", "caption": "a street sign with a car driving under it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528006.jpg", "caption": "a small airplane flying in the sky with its propellers up", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003729.jpg", "caption": "a cat is sitting in a suitcase on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265889.jpg", "caption": "a man playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134818.jpg", "caption": "a box of donuts", "annotations": [{"polygon": [[155, 157], [165, 151], [193, 140], [207, 137], [233, 135], [233, 149], [219, 149], [210, 150], [199, 152], [186, 157], [169, 164], [164, 169]], "text": "Certified", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Certified", "recog_valid": true, "glyph_recog_text": "Certified", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134829.jpg", "caption": "a person holding a frisbee", "annotations": [{"polygon": [[268, 169], [277, 163], [319, 245], [311, 253]], "text": "INNOVA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AWO", "recog_valid": false, "glyph_recog_text": "IkNOVA", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396977.jpg", "caption": "a large clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134835.jpg", "caption": "a bathroom with a toilet and a cabinet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396998.jpg", "caption": "a toilet in a bathroom with a sign on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265933.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[94, 182], [275, 164], [275, 189], [268, 192], [96, 208], [91, 202], [92, 183]], "text": "LEXUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LEXLS", "recog_valid": false, "glyph_recog_text": "LEXUS", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265943.jpg", "caption": "a man in a blue shirt and sunglasses riding a small motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265964.jpg", "caption": "a group of people sitting at a table outside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397058.jpg", "caption": "a man holding two cell phones with the olympic logo on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528131.jpg", "caption": "surf lesson rental and repairs", "annotations": [{"polygon": [[195, 136], [191, 160], [186, 174], [117, 175], [121, 136], [136, 125], [166, 131]], "text": "SURF", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SURF", "recog_valid": true, "glyph_recog_text": "SURF", "glyph_recog_ld": 1.0}, {"polygon": [[112, 204], [109, 237], [165, 242], [198, 237], [199, 205]], "text": "LESSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LESSON", "recog_valid": true, "glyph_recog_text": "LESSON", "glyph_recog_ld": 1.0}, {"polygon": [[108, 242], [107, 278], [201, 284], [198, 252]], "text": "RENTAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REN TAL", "recog_valid": false, "glyph_recog_text": "RENTAL", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[105, 303], [103, 335], [190, 341], [193, 335], [191, 315]], "text": "REPAIRS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REPAIRS", "recog_valid": true, "glyph_recog_text": "REPAIRS", "glyph_recog_ld": 1.0}, {"polygon": [[125, 406], [123, 436], [242, 438], [241, 408]], "text": "SmileBella", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SmleBelo", "recog_valid": false, "glyph_recog_text": "SmileBella", "glyph_recog_ld": 0.7000002999996999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134914.jpg", "caption": "a man standing on a ladder next to a plane", "annotations": [{"polygon": [[328, 124], [328, 123], [437, 134], [467, 139], [466, 200], [443, 199], [388, 193], [340, 186], [322, 181]], "text": "PRUDDE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "PRUODE", "recog_valid": false, "glyph_recog_text": "PRUDDE", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397064.jpg", "caption": "a woman in a pink outfit is playing tennis", "annotations": [{"polygon": [[0, 186], [86, 177], [90, 217], [0, 233]], "text": "Morgan", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Morgan", "recog_valid": true, "glyph_recog_text": "Morgan", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528148.jpg", "caption": "a stop sign with a tree in front of it", "annotations": [{"polygon": [[192, 176], [307, 163], [309, 198], [195, 208]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003870.jpg", "caption": "three women in red shirts holding plates of food", "annotations": [{"polygon": [[128, 242], [113, 274], [150, 272], [139, 241]], "text": "AUCHLAND", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": false, "glyph_recog_text": ".", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003877.jpg", "caption": "a living room with a television, a table, and a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266038.jpg", "caption": "a cat is sleeping in a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003897.jpg", "caption": "a giraffe standing in a pen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003920.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528224.jpg", "caption": "a baseball game is in progress", "annotations": [{"polygon": [[255, 318], [270, 332], [293, 317], [279, 300]], "text": "28", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "28", "recog_valid": true, "glyph_recog_text": "28", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003941.jpg", "caption": "a street sign with a black and white lettering", "annotations": [{"polygon": [[178, 265], [354, 193], [345, 279], [180, 338]], "text": "RAMSAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RAMSAY", "recog_valid": true, "glyph_recog_text": "RAMSAY", "glyph_recog_ld": 1.0}, {"polygon": [[391, 180], [393, 222], [360, 234], [362, 191]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "云", "recog_valid": false, "glyph_recog_text": "0H", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266108.jpg", "caption": "a group of motorcycles on a highway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397186.jpg", "caption": "a man throwing a frisbee in a field of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003988.jpg", "caption": "a woman on a bike talking on the phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397210.jpg", "caption": "a crowd of people walking down a street under a bridge", "annotations": [{"polygon": [[84, 209], [80, 245], [224, 240], [227, 212]], "text": "CAMDN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CAMDN", "recog_valid": true, "glyph_recog_text": "CAMDN", "glyph_recog_ld": 1.0}, {"polygon": [[242, 206], [241, 244], [295, 236], [309, 229], [339, 216], [339, 211]], "text": "LOCK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Loor", "recog_valid": false, "glyph_recog_text": "LOCK", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135082.jpg", "caption": "a group of people playing baseball on a field", "annotations": [{"polygon": [[43, 367], [40, 409], [88, 410], [89, 365]], "text": "88", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "88", "recog_valid": true, "glyph_recog_text": "88", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528299.jpg", "caption": "a banana, a measuring cup and a banana", "annotations": [{"polygon": [[248, 201], [248, 201], [247, 229], [280, 251], [301, 228]], "text": "King", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Yinq", "recog_valid": false, "glyph_recog_text": "King", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135086.jpg", "caption": "a yellow train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004032.jpg", "caption": "a row of yellow school buses parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266217.jpg", "caption": "a little girl is looking at a kite flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266228.jpg", "caption": "a boat is docked in the water with birds flying around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397315.jpg", "caption": "a computer monitor, a book, and a dvd", "annotations": [{"polygon": [[82, 86], [77, 130], [224, 209], [224, 162]], "text": "ORDER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ORDER", "recog_valid": true, "glyph_recog_text": "ORDER", "glyph_recog_ld": 1.0}, {"polygon": [[190, 83], [181, 127], [83, 78], [87, 34]], "text": "LAW", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LAN", "recog_valid": false, "glyph_recog_text": "LAW", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[115, 164], [113, 192], [42, 150], [45, 123]], "text": "SPECIAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SPECIAL", "recog_valid": true, "glyph_recog_text": "SPECIAL", "glyph_recog_ld": 1.0}, {"polygon": [[117, 165], [115, 193], [195, 242], [200, 212]], "text": "VICTIMS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VICTIMS", "recog_valid": true, "glyph_recog_text": "VICTIMS", "glyph_recog_ld": 1.0}, {"polygon": [[204, 216], [200, 245], [255, 278], [260, 247]], "text": "UNIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNIT", "recog_valid": true, "glyph_recog_text": "UNIT", "glyph_recog_ld": 1.0}, {"polygon": [[293, 162], [294, 182], [425, 166], [425, 149]], "text": "MARGARET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MARGARET", "recog_valid": true, "glyph_recog_text": "MARGARET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135206.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[265, 144], [266, 172], [330, 170], [328, 142]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[337, 142], [339, 170], [406, 169], [403, 140]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004139.jpg", "caption": "a woman in a pink dress is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528432.jpg", "caption": "a group of baseball bats sitting on a table", "annotations": [{"polygon": [[183, 356], [182, 365], [202, 381], [227, 393], [229, 377]], "text": "wilson", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "w", "recog_valid": false, "glyph_recog_text": "wilson", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135229.jpg", "caption": "a woman in a blue top and blue scarf posing on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397378.jpg", "caption": "a green double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004172.jpg", "caption": "a group of men playing a game of handball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135258.jpg", "caption": "a man is doing a trick on a snowboard", "annotations": [{"polygon": [[44, 303], [74, 172], [81, 174], [54, 295]], "text": "GUAHREA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "VSJHAUO", "recog_valid": false, "glyph_recog_text": "vya k y at", "glyph_recog_ld": 9.99998999939855e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266334.jpg", "caption": "a stop sign and a cement plant", "annotations": [{"polygon": [[288, 58], [373, 42], [378, 48], [380, 55], [380, 59], [377, 64], [375, 66], [372, 68], [370, 68], [370, 83], [291, 97], [287, 94], [283, 86], [282, 73], [283, 61]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004201.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266353.jpg", "caption": "a boat on a trailer parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266383.jpg", "caption": "a bald man in a suit and tie sitting in front of a bookcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397455.jpg", "caption": "a baseball player holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266386.jpg", "caption": "a sign for the westminster tube station", "annotations": [{"polygon": [[335, 240], [338, 255], [426, 191], [422, 176]], "text": "UNDERGROUND", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNDERGROUND", "recog_valid": true, "glyph_recog_text": "UNDERGROUND", "glyph_recog_ld": 1.0}, {"polygon": [[289, 447], [296, 471], [413, 413], [415, 388], [387, 399], [353, 412]], "text": "Westminister", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Westminster", "recog_valid": false, "glyph_recog_text": "Westminister", "glyph_recog_ld": 0.9166667361110532}, {"polygon": [[321, 463], [321, 483], [358, 467], [363, 472], [369, 449], [340, 461], [337, 454]], "text": "Subway", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Subway", "recog_valid": true, "glyph_recog_text": "Subway", "glyph_recog_ld": 1.0}, {"polygon": [[286, 499], [321, 484], [316, 465], [284, 483]], "text": "Public", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Public", "recog_valid": true, "glyph_recog_text": "Public", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004245.jpg", "caption": "a stop sign with a bloody hand on it", "annotations": [{"polygon": [[162, 214], [157, 281], [321, 269], [326, 199]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135322.jpg", "caption": "a cell phone with a necklace on it", "annotations": [{"polygon": [[215, 161], [274, 187], [267, 196], [208, 172]], "text": "SYNNEX", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0SYNNEX", "recog_valid": false, "glyph_recog_text": "SYNNEX", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397467.jpg", "caption": "a woman is buying ice cream from a food truck", "annotations": [{"polygon": [[136, 195], [137, 228], [188, 233], [194, 216], [194, 204], [150, 196]], "text": "Scoop", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Scoop", "recog_valid": true, "glyph_recog_text": "Scoop", "glyph_recog_ld": 1.0}, {"polygon": [[198, 123], [198, 149], [219, 148], [239, 136], [239, 115]], "text": "Crafted", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "cnafted", "recog_valid": false, "glyph_recog_text": "Crated", "glyph_recog_ld": 0.5714291836725947}, {"polygon": [[245, 117], [244, 136], [257, 134], [292, 128], [293, 101], [286, 104], [283, 110]], "text": "natural", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "natunal", "recog_valid": false, "glyph_recog_text": "natural", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135327.jpg", "caption": "a man with dreads holding a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397486.jpg", "caption": "a knife sitting on a cutting board", "annotations": [{"polygon": [[102, 392], [102, 401], [192, 379], [192, 368]], "text": "Handcrafted", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Handerafted", "recog_valid": false, "glyph_recog_text": "Hagdecaized", "glyph_recog_ld": 0.6363639669418482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135344.jpg", "caption": "a woman with a backpack and a man with a backpack", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135345.jpg", "caption": "a bus driving down a street with smoke coming out of the bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528563.jpg", "caption": "a woman and a man playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135348.jpg", "caption": "a man is standing next to a parking meter", "annotations": [{"polygon": [[341, 15], [355, 70], [426, 13], [419, 0], [363, 1]], "text": "BUCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BM", "recog_valid": false, "glyph_recog_text": "BUCH", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004289.jpg", "caption": "a phone on the side of a building in a city", "annotations": [{"polygon": [[188, 300], [89, 511], [302, 509], [261, 294]], "text": "1504", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "三", "recog_valid": false, "glyph_recog_text": ":", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266436.jpg", "caption": "two slices of pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135367.jpg", "caption": "two giraffes are standing in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397511.jpg", "caption": "a bus is parked at a bus stop with people walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266455.jpg", "caption": "a train is parked on the tracks next to a building", "annotations": [{"polygon": [[83, 215], [83, 249], [181, 248], [182, 220]], "text": "Canada", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Canada", "recog_valid": true, "glyph_recog_text": "Canada", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135392.jpg", "caption": "a man standing in front of a mirror in a bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004322.jpg", "caption": "a blue train traveling down a track through a forest", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004331.jpg", "caption": "a man and a woman standing next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528624.jpg", "caption": "two young boys standing on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135412.jpg", "caption": "a man in a yellow vest is standing in the middle of a busy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528654.jpg", "caption": "a living room with a table and chairs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266515.jpg", "caption": "a woman feeding a giraffe with a bottle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266518.jpg", "caption": "a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266520.jpg", "caption": "a man is feeding a cow in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135459.jpg", "caption": "a red bench sitting in front of a stone building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397605.jpg", "caption": "a baseball player swinging at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135463.jpg", "caption": "a bus driving down a street with people standing on the side", "annotations": [{"polygon": [[128, 395], [258, 382], [278, 400], [155, 415]], "text": "OW", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ON", "recog_valid": false, "glyph_recog_text": "ow", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528712.jpg", "caption": "a group of people standing on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135502.jpg", "caption": "a street sign with a no parking sign and a traffic light", "annotations": [{"polygon": [[274, 113], [381, 122], [382, 146], [279, 139], [271, 136], [271, 131], [275, 128], [274, 125], [271, 121], [271, 116]], "text": "STATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STATE", "recog_valid": true, "glyph_recog_text": "STATE", "glyph_recog_ld": 1.0}, {"polygon": [[422, 252], [483, 275], [486, 292], [423, 270]], "text": "STANDING", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STANDING", "recog_valid": true, "glyph_recog_text": "STANDING", "glyph_recog_ld": 1.0}, {"polygon": [[431, 284], [484, 304], [485, 320], [429, 303]], "text": "ANYTIME", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ANYT!ME", "recog_valid": false, "glyph_recog_text": "ANYTIME", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397664.jpg", "caption": "a man riding a bike on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266597.jpg", "caption": "a train on the tracks", "annotations": [{"polygon": [[44, 328], [48, 360], [12, 366], [4, 329]], "text": "B>C002", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "102", "recog_valid": false, "glyph_recog_text": "ecn2", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[54, 357], [49, 404], [9, 403], [2, 369]], "text": "STOMP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Seror", "recog_valid": false, "glyph_recog_text": "STOMF", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397673.jpg", "caption": "united airlines boeing 757-200", "annotations": [{"polygon": [[246, 244], [298, 254], [315, 224], [258, 221]], "text": "UNITED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNTEO", "recog_valid": false, "glyph_recog_text": "UNITED", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004462.jpg", "caption": "a road with construction cones and traffic signs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004471.jpg", "caption": "a parking meter on the side of a street", "annotations": [{"polygon": [[131, 291], [178, 272], [175, 261], [128, 278]], "text": "B.6414.wk", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "B6414-WK", "recog_valid": false, "glyph_recog_text": "B844wk", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266618.jpg", "caption": "a man and a woman riding a motorcycle down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004478.jpg", "caption": "an all nippon airways boeing 787-9 at tokyo airport", "annotations": [{"polygon": [[89, 223], [111, 241], [104, 255], [77, 230]], "text": "ANA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "PNA", "recog_valid": false, "glyph_recog_text": "ANA", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004481.jpg", "caption": "a street sign is on a pole", "annotations": [{"polygon": [[278, 81], [278, 81], [282, 120], [156, 142], [150, 97]], "text": "NEWTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "NEWTON", "recog_valid": true, "glyph_recog_text": "NEWTON", "glyph_recog_ld": 1.0}, {"polygon": [[205, 164], [301, 200], [308, 160], [215, 135]], "text": "E. BRADOOCK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BRAOOOCK", "recog_valid": false, "glyph_recog_text": "E.BRADOOCK", "glyph_recog_ld": 0.7000002999996999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397701.jpg", "caption": "a woman riding a motorbike with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266646.jpg", "caption": "a street sign with the name shadder on it", "annotations": [{"polygon": [[299, 146], [486, 162], [489, 191], [294, 176]], "text": "SHRADER", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SHRADER", "recog_valid": true, "glyph_recog_text": "SHRADER", "glyph_recog_ld": 1.0}, {"polygon": [[367, 100], [367, 100], [412, 46], [436, 47], [432, 58], [379, 113], [368, 113]], "text": "RNASSU", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "NASSU", "recog_valid": false, "glyph_recog_text": "RNASSU", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135576.jpg", "caption": "a plate with a biscuit and eggs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266652.jpg", "caption": "a street sign that says montgomery st", "annotations": [{"polygon": [[166, 83], [165, 109], [230, 109], [230, 112], [238, 113], [241, 109], [247, 108], [314, 105], [315, 109], [321, 109], [325, 83]], "text": "Montgomery", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Montgomery", "recog_valid": true, "glyph_recog_text": "Montgomery", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528800.jpg", "caption": "a clock on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135589.jpg", "caption": "a man in a suit and tie standing on a subway train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135608.jpg", "caption": "a skateboarder is doing a trick in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397782.jpg", "caption": "a teddy bear on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004575.jpg", "caption": "a desk with two laptops and a computer monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397792.jpg", "caption": "a stuffed animal sitting on a desk", "annotations": [{"polygon": [[275, 301], [266, 312], [294, 331], [299, 325], [277, 302]], "text": "love", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ove", "recog_valid": false, "glyph_recog_text": "kwe", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004595.jpg", "caption": "a bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397813.jpg", "caption": "a table with bowls of food and cups of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397815.jpg", "caption": "a group of people standing in the rain with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004616.jpg", "caption": "two children sitting on a stage with balloons", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135694.jpg", "caption": "two pizzas on a stove top with a pan", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528915.jpg", "caption": "a kitchen with a sink, counter, and a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528929.jpg", "caption": "a red classic truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397859.jpg", "caption": "a man riding a skateboard in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004662.jpg", "caption": "a baseball player is at home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135735.jpg", "caption": "a baby wearing a hat and tie laying on a white background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135741.jpg", "caption": "a black and white photo of urinals in a bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528960.jpg", "caption": "a photograph of a person carrying a red suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004673.jpg", "caption": "a hamburger and fries are on a paper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266827.jpg", "caption": "a motorcycle parked in front of a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004702.jpg", "caption": "a group of men in green and white playing soccer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135775.jpg", "caption": "a stop sign on a road with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004706.jpg", "caption": "a giraffe standing in a field with a sign", "annotations": [{"polygon": [[377, 400], [498, 395], [505, 422], [385, 427]], "text": "Giraffe", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Giraffe", "recog_valid": true, "glyph_recog_text": "fGiraffe", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[-2, 402], [511, 382], [513, 448], [0, 449]], "text": "Giraffe", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "e lilclraff.", "recog_valid": false, "glyph_recog_text": "Giraff e", "glyph_recog_ld": 0.33333388888842586}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397925.jpg", "caption": "a mirror reflecting a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135794.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[314, 269], [360, 270], [362, 308], [313, 309]], "text": "47", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "47", "recog_valid": true, "glyph_recog_text": "47", "glyph_recog_ld": 1.0}, {"polygon": [[268, 239], [350, 216], [355, 259], [276, 284]], "text": "Dodgers", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Dodgeng", "recog_valid": false, "glyph_recog_text": "Dodgers", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529012.jpg", "caption": "a banana sitting on a desk next to a calculator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266880.jpg", "caption": "two boys sitting on a bed with stuffed animals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135815.jpg", "caption": "a stuffed monkey and a bottle of wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529055.jpg", "caption": "a person holding an umbrella in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266912.jpg", "caption": "a refrigerator with a white board on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397986.jpg", "caption": "a british airways plane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266914.jpg", "caption": "a parking meter is on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529087.jpg", "caption": "a kitchen with a center island and bar stools", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135875.jpg", "caption": "a pile of books and a bag of chips", "annotations": [{"polygon": [[98, 345], [56, 351], [71, 447], [125, 448]], "text": "VING DE", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "VINGOF", "recog_valid": false, "glyph_recog_text": ">-zo", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[235, 193], [231, 199], [247, 207], [258, 214], [265, 223], [273, 233], [278, 236], [284, 236], [284, 234], [278, 232], [270, 221], [259, 209]], "text": "COMBOS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "combon", "recog_valid": false, "glyph_recog_text": "com品部品", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[229, 172], [221, 181], [241, 191], [278, 228], [284, 219], [260, 196]], "text": "ZONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZONE", "recog_valid": true, "glyph_recog_text": "ZOHE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266959.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266961.jpg", "caption": "a green fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004820.jpg", "caption": "a cat laying on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398040.jpg", "caption": "a large jet airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004827.jpg", "caption": "a cake with a dolphin and the name amelie", "annotations": [{"polygon": [[253, 242], [228, 194], [286, 197], [281, 246], [255, 244]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "9", "recog_valid": true, "glyph_recog_text": "9", "glyph_recog_ld": 1.0}, {"polygon": [[115, 282], [146, 309], [175, 325], [231, 339], [271, 338], [320, 326], [364, 311], [408, 277], [379, 239], [341, 249], [306, 242], [291, 242], [247, 270], [196, 253], [195, 225], [186, 225], [114, 282]], "text": "Amelie", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "dmene", "recog_valid": false, "glyph_recog_text": "Amelie", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529117.jpg", "caption": "a herd of elephants walking through a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266976.jpg", "caption": "a kitchen with a sink, a refrigerator and a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266990.jpg", "caption": "a baby sitting on a bed with stuffed animals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398070.jpg", "caption": "a yellow fire hydrant sitting in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398083.jpg", "caption": "a young boy running to first base", "annotations": [{"polygon": [[307, 156], [301, 183], [331, 183], [328, 150], [322, 150]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "10", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267014.jpg", "caption": "a woman eating a donut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267022.jpg", "caption": "yellow lawn chairs and umbrellas on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135955.jpg", "caption": "a train on the tracks near the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267028.jpg", "caption": "a group of people sitting on bunk beds in a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135959.jpg", "caption": "a baseball player throwing a ball from the mound", "annotations": [{"polygon": [[266, 214], [276, 173], [332, 186], [318, 231]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "12", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004888.jpg", "caption": "a small orange and white boat traveling on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267042.jpg", "caption": "several laptops are sitting on a table with a person standing next to them", "annotations": [{"polygon": [[426, 368], [402, 410], [378, 403], [406, 364]], "text": "TOSHIBA", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "TOSHIBA", "recog_valid": true, "glyph_recog_text": "TOGHIBA", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529188.jpg", "caption": "a black cat laying in a bathroom sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004904.jpg", "caption": "a kitchen with a red and white rug", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267059.jpg", "caption": "a woman brushing her teeth in a bathroom", "annotations": [{"polygon": [[95, 388], [95, 388], [138, 386], [139, 404], [166, 403], [172, 399], [188, 408], [191, 411], [190, 440], [124, 436], [111, 436], [84, 437]], "text": "Meti", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Meti", "recog_valid": true, "glyph_recog_text": "Meti", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267064.jpg", "caption": "a street with a traffic light and a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004920.jpg", "caption": "a group of airplanes on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267082.jpg", "caption": "a vase with sunflowers in it", "annotations": [{"polygon": [[207, 41], [207, 41], [228, 38], [242, 48], [237, 76], [224, 82], [197, 71]], "text": "ors", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ols", "recog_valid": false, "glyph_recog_text": "g", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529227.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267095.jpg", "caption": "a dog in a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136026.jpg", "caption": "a man standing next to a large truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398175.jpg", "caption": "a man in a suit and tie speaking at a podium", "annotations": [{"polygon": [[331, 154], [341, 159], [372, 153], [377, 148], [385, 152], [411, 148], [417, 166], [336, 178], [323, 177]], "text": "AMERICAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Amcrican", "recog_valid": false, "glyph_recog_text": "AMERICAN", "glyph_recog_ld": 0.12500109374863277}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004963.jpg", "caption": "a cat sitting in front of a television watching a fish", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267111.jpg", "caption": "a group of people standing around a luggage carousel", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529258.jpg", "caption": "a man walking down the street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136043.jpg", "caption": "a bus driving down a city street with cars and bikes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398229.jpg", "caption": "a young boy playing with a toy truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529303.jpg", "caption": "a man and a woman playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136088.jpg", "caption": "starbucks coffee cup with you on it", "annotations": [{"polygon": [[301, 321], [303, 316], [309, 306], [312, 303], [316, 299], [322, 297], [331, 294], [337, 294], [351, 295], [361, 298], [369, 302], [375, 308], [379, 314], [382, 319], [382, 324], [373, 329], [371, 322], [366, 315], [363, 311], [352, 307], [342, 305], [333, 305], [324, 308], [316, 315], [311, 327]], "text": "STARBUCKS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6", "recog_valid": false, "glyph_recog_text": "STARBUCKS", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529322.jpg", "caption": "a man wearing a suit and tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267192.jpg", "caption": "a train with a red and white engine on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529340.jpg", "caption": "the cover of 7 sky magazine with a man walking on the beach", "annotations": [{"polygon": [[70, 48], [72, 146], [177, 150], [178, 48]], "text": "7", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "7", "recog_valid": true, "glyph_recog_text": "7", "glyph_recog_ld": 1.0}, {"polygon": [[183, 23], [183, 106], [441, 105], [440, 26]], "text": "sky", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "sky", "recog_valid": true, "glyph_recog_text": "s k y", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267199.jpg", "caption": "a man sitting on a chair with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267203.jpg", "caption": "a train is parked at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529352.jpg", "caption": "a large auditorium filled with people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136142.jpg", "caption": "a table with a lot of food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136154.jpg", "caption": "a view of the big ben clock tower and the palace of westminster", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267229.jpg", "caption": "a street sign on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136185.jpg", "caption": "a bookcase with books, a record player, and a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136186.jpg", "caption": "cows laying in the grass in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529427.jpg", "caption": "a tennis court", "annotations": [{"polygon": [[361, 122], [361, 133], [441, 112], [440, 99]], "text": "FORMEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "OREMAN", "recog_valid": false, "glyph_recog_text": "FORMEN", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005140.jpg", "caption": "a laptop computer on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005139.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398367.jpg", "caption": "a man is standing next to a cart with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136235.jpg", "caption": "a large airplane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005174.jpg", "caption": "a red double decker bus parked in front of a palm tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529470.jpg", "caption": "a group of people standing in front of a school bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398406.jpg", "caption": "a man in a suit and tie standing in front of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005198.jpg", "caption": "a family riding on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398426.jpg", "caption": "a man riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398434.jpg", "caption": "a cat sitting on a table with plates", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398442.jpg", "caption": "two men loading luggage onto a conveyor belt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267372.jpg", "caption": "a bus driving down a street with a man walking on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529545.jpg", "caption": "two pictures of a street with a horse drawn carriage and a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267416.jpg", "caption": "a stop sign with a turtle on it", "annotations": [{"polygon": [[209, 375], [248, 371], [255, 416], [198, 419]], "text": "ALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "AL", "recog_valid": false, "glyph_recog_text": "ALL", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[337, 164], [340, 239], [354, 240], [353, 218], [381, 213], [392, 207], [397, 187], [392, 170], [380, 159]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Q", "recog_valid": false, "glyph_recog_text": "a", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136346.jpg", "caption": "a yellow bus driving down a street with buildings in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529570.jpg", "caption": "a sign for tomasso's restaurant on the side of a building", "annotations": [{"polygon": [[92, 216], [109, 263], [123, 266], [239, 274], [312, 280], [320, 292], [332, 289], [335, 256], [329, 240], [314, 236], [244, 215], [181, 204], [150, 202], [115, 208]], "text": "Tomasino's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tomasuno3", "recog_valid": false, "glyph_recog_text": "Tomasino's", "glyph_recog_ld": 0.7000002999996999}, {"polygon": [[259, 285], [258, 303], [331, 323], [330, 304]], "text": "CELLAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CELLAR", "recog_valid": true, "glyph_recog_text": "CELLAR", "glyph_recog_ld": 1.0}, {"polygon": [[95, 274], [88, 304], [88, 306], [346, 368], [346, 337]], "text": "RISTORANTE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RISTORANTE", "recog_valid": true, "glyph_recog_text": "RISTORANTE", "glyph_recog_ld": 1.0}, {"polygon": [[438, 307], [444, 341], [493, 377], [499, 370], [501, 345], [500, 287], [501, 252]], "text": "T", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "F", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267435.jpg", "caption": "two men sitting at a table with a basket of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136369.jpg", "caption": "a pink van with a tent on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398527.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267457.jpg", "caption": "a baseball player standing on a mound of dirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267463.jpg", "caption": "a car is parked next to a stop sign", "annotations": [{"polygon": [[414, 167], [498, 157], [498, 157], [500, 170], [486, 190], [416, 197]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529613.jpg", "caption": "a group of people standing around a table with a pizza", "annotations": [{"polygon": [[326, 146], [326, 146], [414, 159], [409, 186], [322, 172]], "text": "ILLINI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ILLINI", "recog_valid": true, "glyph_recog_text": "ILLINI", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005340.jpg", "caption": "a blue and white train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136415.jpg", "caption": "a bicycle is parked in front of a pile of shoes", "annotations": [{"polygon": [[184, 230], [247, 285], [253, 280], [195, 228]], "text": "AUTHOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AUTHOR", "recog_valid": true, "glyph_recog_text": "A", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529645.jpg", "caption": "a cow with a collar on is standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529661.jpg", "caption": "a group of people in a classroom with a projector screen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136461.jpg", "caption": "a bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136468.jpg", "caption": "a woman is smiling while talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267548.jpg", "caption": "two men in yellow jackets and red vests are holding a motorcycle", "annotations": [{"polygon": [[120, 318], [133, 285], [184, 287], [170, 320]], "text": "SERV", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SERV", "recog_valid": true, "glyph_recog_text": "SERV", "glyph_recog_ld": 1.0}, {"polygon": [[139, 378], [212, 336], [217, 344], [144, 382]], "text": "OOD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "www.serv.org.uk", "recog_valid": false, "glyph_recog_text": "及", "glyph_recog_ld": 6.666662222265529e-07}, {"polygon": [[276, 136], [276, 143], [281, 143], [288, 145], [296, 149], [302, 157], [306, 164], [307, 170], [314, 170], [312, 162], [308, 156], [304, 150], [296, 143], [285, 138]], "text": "I", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OILENC", "recog_valid": false, "glyph_recog_text": "i", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[253, 168], [256, 186], [263, 199], [299, 190], [300, 164], [295, 155], [289, 154], [284, 152], [266, 152], [258, 156], [254, 162]], "text": "ADCOM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "sha", "recog_valid": false, "glyph_recog_text": "AOCOM", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005459.jpg", "caption": "a man standing on a tennis court holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267604.jpg", "caption": "a bus driving down a street with cars and trucks behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136539.jpg", "caption": "a refrigerator with a sign has texts", "annotations": [{"polygon": [[344, 84], [343, 118], [388, 119], [388, 85]], "text": "BLIP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "blip", "recog_valid": false, "glyph_recog_text": "BLIP", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005483.jpg", "caption": "a view of a store with a counter and a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529772.jpg", "caption": "a baseball player is holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136559.jpg", "caption": "a cat laying on a desk next to a computer keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136563.jpg", "caption": "a group of buses parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529788.jpg", "caption": "a large airplane on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005500.jpg", "caption": "a street sign with a street name on it", "annotations": [{"polygon": [[180, 313], [188, 298], [163, 275], [153, 290]], "text": "23", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "23", "recog_valid": true, "glyph_recog_text": "23", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136575.jpg", "caption": "a tray of food on an airplane with a bottle of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267647.jpg", "caption": "a birthday cake with a knife in it", "annotations": [{"polygon": [[215, 124], [244, 125], [290, 150], [272, 185], [188, 147]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[159, 136], [169, 138], [229, 157], [260, 178], [232, 211], [134, 156]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "fieklaf", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.2500009374988281}, {"polygon": [[153, 159], [217, 198], [204, 225], [113, 206]], "text": "Joan", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Joan", "recog_valid": true, "glyph_recog_text": "Joan", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005505.jpg", "caption": "a busy city street with people walking and shopping", "annotations": [{"polygon": [[21, 169], [22, 187], [116, 202], [117, 186]], "text": "TIMPSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "TIMPSON", "recog_valid": true, "glyph_recog_text": "TIMPSON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267653.jpg", "caption": "a striped cat laying on a blue blanket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398729.jpg", "caption": "a living room with a couch, coffee table and television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529802.jpg", "caption": "baskets of apples are displayed at a market", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529804.jpg", "caption": "a birthday cake in a box", "annotations": [{"polygon": [[89, 267], [107, 272], [132, 291], [160, 293], [167, 296], [173, 305], [157, 338], [144, 333], [112, 329], [94, 321], [70, 291], [60, 273], [63, 267]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "马", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[187, 296], [202, 283], [214, 274], [225, 257], [240, 229], [241, 201], [231, 176], [251, 175], [268, 193], [268, 232], [257, 262], [237, 293], [196, 320]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267661.jpg", "caption": "a man wearing a hat and a tie sitting on a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529827.jpg", "caption": "a book on a bed with a pillow", "annotations": [{"polygon": [[252, 333], [246, 339], [284, 366], [287, 360]], "text": "AVENUE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AVENUE", "recog_valid": true, "glyph_recog_text": "NYENUS", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529829.jpg", "caption": "a man is walking around a market with a bunch of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529842.jpg", "caption": "a large white building with a clock on the front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005557.jpg", "caption": "a large group of airplanes on the runway", "annotations": [{"polygon": [[162, 218], [178, 240], [190, 252], [200, 250], [190, 236], [183, 229], [172, 214], [166, 215]], "text": "CANJET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CANJET", "recog_valid": true, "glyph_recog_text": "CANET", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136641.jpg", "caption": "a baseball player swinging a bat on a field", "annotations": [{"polygon": [[195, 233], [192, 297], [12, 300], [9, 238]], "text": "GER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "GER", "recog_valid": true, "glyph_recog_text": "GER", "glyph_recog_ld": 1.0}, {"polygon": [[392, 233], [511, 234], [511, 292], [392, 295]], "text": "BA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BA", "recog_valid": true, "glyph_recog_text": "BA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136652.jpg", "caption": "a road work ahead sign on a pole", "annotations": [{"polygon": [[216, 168], [214, 196], [293, 175], [298, 157], [293, 141], [238, 157], [218, 167]], "text": "ROAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ROAD", "recog_valid": true, "glyph_recog_text": "ROAD", "glyph_recog_ld": 1.0}, {"polygon": [[207, 220], [210, 252], [305, 232], [301, 192], [252, 203], [207, 219]], "text": "WORK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WORK", "recog_valid": true, "glyph_recog_text": "WORK", "glyph_recog_ld": 1.0}, {"polygon": [[204, 269], [204, 301], [204, 305], [302, 297], [309, 286], [309, 259], [204, 268]], "text": "AHEAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AHEAD", "recog_valid": true, "glyph_recog_text": "AHEAD", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267731.jpg", "caption": "a statue of a cow with a frisbee in its mouth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267732.jpg", "caption": "a little girl holding a frisbee", "annotations": [{"polygon": [[159, 297], [227, 298], [227, 267], [164, 267]], "text": "little", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ittule", "recog_valid": false, "glyph_recog_text": "little", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[151, 328], [233, 330], [233, 289], [149, 293]], "text": "love", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ove", "recog_valid": false, "glyph_recog_text": "love", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[179, 355], [180, 324], [246, 325], [243, 363], [222, 361], [222, 354]], "text": "bug", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "bug", "recog_valid": true, "glyph_recog_text": "bug", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529886.jpg", "caption": "a collage of pictures of a hotel room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005608.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136687.jpg", "caption": "a man laying on a couch with a remote control in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267762.jpg", "caption": "an old red fire truck parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136699.jpg", "caption": "a purse with many items in it", "annotations": [{"polygon": [[348, 328], [347, 319], [354, 309], [369, 292], [374, 286], [390, 285], [396, 288], [397, 289], [396, 296], [387, 308], [377, 317], [370, 329]], "text": "Belle Jow", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Pellc", "recog_valid": false, "glyph_recog_text": "Belle Jtow", "glyph_recog_ld": 0.3000006999993}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136704.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267780.jpg", "caption": "a heart shaped pizza on a tray", "annotations": [{"polygon": [[80, 378], [83, 429], [149, 428], [145, 377]], "text": "z", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Z", "recog_valid": false, "glyph_recog_text": "z", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[221, 426], [219, 374], [289, 374], [287, 426]], "text": "Z", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Z", "recog_valid": true, "glyph_recog_text": "Z", "glyph_recog_ld": 1.0}, {"polygon": [[355, 420], [356, 372], [423, 368], [425, 417]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "A", "recog_valid": true, "glyph_recog_text": "A", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398863.jpg", "caption": "a young boy holding a baseball bat in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267794.jpg", "caption": "two cats laying on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136721.jpg", "caption": "a woman is serving a tennis ball on a court", "annotations": [{"polygon": [[284, 32], [284, 32], [341, 20], [342, 25], [342, 44], [287, 56], [284, 52]], "text": "0:20", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "0:20", "recog_valid": true, "glyph_recog_text": "0:20", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005684.jpg", "caption": "a bus driving down a street with houses on either side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267830.jpg", "caption": "a man holding a snowboard on a snowy mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005689.jpg", "caption": "a woman is standing in a store with a lot of luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136764.jpg", "caption": "a man sitting in front of a bedroom with a bed and a wardrobe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267848.jpg", "caption": "a man in a black and yellow shirt is about to throw a rugby ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530002.jpg", "caption": "a computer desk with a keyboard and mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005736.jpg", "caption": "a sailboat in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267880.jpg", "caption": "a pink suitcase sitting on the floor next to a sign that says astro skating center", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267927.jpg", "caption": "a woman riding a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399016.jpg", "caption": "a yellow bird sitting on a branch with leaves in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530097.jpg", "caption": "a laptop computer sitting on a desk with a keyboard and mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005809.jpg", "caption": "a large clock on a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530124.jpg", "caption": "a young boy in a green and white uniform", "annotations": [{"polygon": [[321, 289], [325, 286], [363, 336], [358, 341]], "text": "EASTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FARTAN", "recog_valid": false, "glyph_recog_text": ":jolop", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530132.jpg", "caption": "two buses parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136928.jpg", "caption": "three buses parked in a row with a man standing in front of them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136932.jpg", "caption": "three men in bavarian costumes eating hot dogs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399078.jpg", "caption": "a person walking in the rain with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268023.jpg", "caption": "a red bus parked next to a white bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399101.jpg", "caption": "a man wearing a tie and a black belt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136963.jpg", "caption": "a person wearing a shirt that says i'm a character", "annotations": [{"polygon": [[313, 362], [311, 388], [387, 395], [462, 398], [460, 379]], "text": "charater", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "character", "recog_valid": false, "glyph_recog_text": "charater", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268036.jpg", "caption": "a group of soldiers on a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136977.jpg", "caption": "a street sign with a yellow caution sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136979.jpg", "caption": "a woman on a bike and a man on a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005906.jpg", "caption": "a television set in a living room with a person on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530197.jpg", "caption": "a train is parked at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530201.jpg", "caption": "a kitchen sink with a plant in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005915.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005916.jpg", "caption": "a snowboarder doing a trick in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530202.jpg", "caption": "a street sign and a traffic light on a pole", "annotations": [{"polygon": [[178, 140], [288, 126], [285, 153], [173, 166]], "text": "HUDSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HUDSON", "recog_valid": true, "glyph_recog_text": "HUDSON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136992.jpg", "caption": "a male afl player in yellow and black uniform kicking a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268065.jpg", "caption": "a man riding a motorcycle with a cage on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399142.jpg", "caption": "the seat on a motorcycle is black and has a red seat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399148.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268081.jpg", "caption": "an old black and white photo of the sydney harbour bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268102.jpg", "caption": "a cat sitting on a desk watching a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530282.jpg", "caption": "a yellow truck parked on a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399214.jpg", "caption": "a skateboarder doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137075.jpg", "caption": "a red and white sign is on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268147.jpg", "caption": "a dog laying on the ground next to a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000006010.jpg", "caption": "an old black steam engine train is on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268159.jpg", "caption": "a man standing in a kitchen holding a piece of paper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137094.jpg", "caption": "a stop sign and a street sign", "annotations": [{"polygon": [[93, 176], [90, 211], [128, 225], [132, 220], [132, 194], [93, 176]], "text": "REDS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "REN", "recog_valid": false, "glyph_recog_text": "REDS", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[84, 232], [82, 275], [276, 269], [277, 218], [83, 233]], "text": "ARISTOCRATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ARISTOCRATE", "recog_valid": true, "glyph_recog_text": "ARISTOCRATE", "glyph_recog_ld": 1.0}, {"polygon": [[292, 217], [292, 267], [415, 263], [414, 207], [292, 216]], "text": "PLACE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PLACE", "recog_valid": true, "glyph_recog_text": "PLACE", "glyph_recog_ld": 1.0}, {"polygon": [[0, 414], [0, 447], [116, 448], [122, 430], [115, 412], [63, 410], [1, 414]], "text": "TOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "TOD", "recog_valid": false, "glyph_recog_text": "TOP", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137096.jpg", "caption": "a remote control and a book on a table", "annotations": [{"polygon": [[113, 263], [113, 263], [91, 293], [84, 289], [89, 280], [106, 259], [109, 258], [114, 263]], "text": "KY169JG", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "DI691AN", "recog_valid": false, "glyph_recog_text": "消R", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137127.jpg", "caption": "a large airplane sitting on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000006057.jpg", "caption": "a woman in a yellow shirt and black skirt playing tennis", "annotations": [{"polygon": [[71, 348], [324, 339], [327, 387], [74, 387]], "text": "LEXI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LEX", "recog_valid": false, "glyph_recog_text": "LEXI", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137134.jpg", "caption": "a person in the air on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000006075.jpg", "caption": "a train station with a clock and a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530386.jpg", "caption": "a table with a bunch of fruit and a box of cereal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137178.jpg", "caption": "a baseball player is holding a bat", "annotations": [{"polygon": [[218, 252], [264, 252], [266, 301], [222, 300]], "text": "18", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "18", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268251.jpg", "caption": "a woman standing on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000006107.jpg", "caption": "a baseball game is being played in front of a tori gate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399325.jpg", "caption": "a bus is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399332.jpg", "caption": "a car window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268266.jpg", "caption": "a street sign with a traffic light and a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268292.jpg", "caption": "a stove top with a pot and a kettle on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000006148.jpg", "caption": "a bus is parked in a garage with a sign that says express bus", "annotations": [{"polygon": [[352, 257], [453, 276], [450, 301], [343, 294]], "text": "EXPRESSBUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "EXPRESSBU", "recog_valid": false, "glyph_recog_text": "EXPRESSBUS", "glyph_recog_ld": 0.9000000999999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137230.jpg", "caption": "a store selling fruit and vegetables on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268313.jpg", "caption": "a man riding a bike with a dog on his back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137243.jpg", "caption": "a flower pot with flowers on a window sill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530460.jpg", "caption": "three white cows standing in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000006190.jpg", "caption": "a man sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399408.jpg", "caption": "a hot dog and fries in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268342.jpg", "caption": "a street light and traffic signs on a pole", "annotations": [{"polygon": [[143, 399], [148, 355], [188, 355], [186, 397]], "text": "mic", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "m", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137273.jpg", "caption": "a banana and an apple with a sticker on it", "annotations": [{"polygon": [[328, 310], [358, 311], [359, 328], [358, 343], [359, 355], [359, 368], [359, 376], [359, 389], [359, 400], [330, 402], [328, 394], [328, 385], [329, 380], [329, 367], [330, 357], [329, 349], [329, 339], [328, 327], [327, 320]], "text": "ORGANIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ORGANIC", "recog_valid": true, "glyph_recog_text": "oo_Swzr", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139326.jpg", "caption": "a vintage radio in a cabinet with a wooden door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139339.jpg", "caption": "a group of people playing frisbee in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139345.jpg", "caption": "a train traveling down the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139389.jpg", "caption": "a blue car with a surfboard on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532605.jpg", "caption": "a man riding a skateboard down a road with a hill in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139390.jpg", "caption": "a white bus with a green and white design", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532622.jpg", "caption": "a couple of people walking down a path", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000401553.jpg", "caption": "a group of people riding skateboards on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532629.jpg", "caption": "a train car with graffiti on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270497.jpg", "caption": "a street with a stop sign and a car driving down it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000401589.jpg", "caption": "a group of people standing around a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000008373.jpg", "caption": "a large airplane parked on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532668.jpg", "caption": "a large ship in the water with a city in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270523.jpg", "caption": "a man riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000008385.jpg", "caption": "a train car with luggage on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139492.jpg", "caption": "a toilet and a roll of toilet paper on the floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139496.jpg", "caption": "a do not enter sign is shown in the dark", "annotations": [{"polygon": [[102, 106], [103, 142], [148, 144], [150, 111]], "text": "DO", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "DO", "recog_valid": true, "glyph_recog_text": "DO", "glyph_recog_ld": 1.0}, {"polygon": [[173, 114], [173, 146], [173, 146], [238, 150], [239, 120]], "text": "NOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NOT", "recog_valid": true, "glyph_recog_text": "NOT", "glyph_recog_ld": 1.0}, {"polygon": [[118, 213], [122, 253], [237, 255], [231, 216]], "text": "ENTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ENTER", "recog_valid": true, "glyph_recog_text": "ENTER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532711.jpg", "caption": "a pizza on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139523.jpg", "caption": "a man holding a remote control in his hand", "annotations": [{"polygon": [[299, 409], [287, 390], [298, 361], [325, 358], [342, 352], [354, 356], [354, 360], [349, 369], [332, 382]], "text": "Australia", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "Australia", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270596.jpg", "caption": "a woman playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000008453.jpg", "caption": "a view of the wing of an airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532744.jpg", "caption": "a traffic light on a street with cars and trucks", "annotations": [{"polygon": [[454, 316], [446, 363], [484, 363], [488, 350], [504, 348], [509, 307]], "text": "IP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270612.jpg", "caption": "a black and white photo of a motel sign", "annotations": [{"polygon": [[239, 36], [239, 216], [268, 216], [269, 38]], "text": "MOTEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ECHU.I", "recog_valid": false, "glyph_recog_text": "20-W-", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139538.jpg", "caption": "a group of people are flying kites in the field", "annotations": [{"polygon": [[195, 189], [178, 201], [181, 206], [193, 212], [200, 223], [202, 235], [207, 240], [224, 232], [220, 224], [218, 210], [211, 201], [197, 191]], "text": "198", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "19:", "recog_valid": false, "glyph_recog_text": "198", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532766.jpg", "caption": "a display of pizzas in plastic bags", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139551.jpg", "caption": "a woman in blue and white tennis outfit playing tennis", "annotations": [{"polygon": [[190, 216], [189, 221], [207, 222], [225, 225], [239, 230], [249, 236], [250, 228], [248, 218], [240, 214], [237, 212], [227, 211], [223, 207], [212, 206]], "text": "DCSB", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JC", "recog_valid": false, "glyph_recog_text": "DCSB", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532768.jpg", "caption": "a large white garbage truck parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532779.jpg", "caption": "a street with a lot of signs and traffic lights", "annotations": [{"polygon": [[288, 248], [290, 290], [318, 291], [317, 248]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "a", "recog_valid": false, "glyph_recog_text": "a", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139571.jpg", "caption": "a group of men in white uniforms cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139590.jpg", "caption": "a baseball game with a batter at bat and a catcher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139594.jpg", "caption": "two red motorcycles parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139595.jpg", "caption": "a woman is posing for a picture on a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270686.jpg", "caption": "a large airplane with the word branair on it", "annotations": [{"polygon": [[110, 238], [117, 272], [367, 266], [363, 240]], "text": "BRANIEE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "二RONIFE", "recog_valid": false, "glyph_recog_text": "BRANIEE", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000008564.jpg", "caption": "a black and white photo of cars on the street", "annotations": [{"polygon": [[186, 201], [236, 228], [262, 218], [211, 191]], "text": "X-ING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WX", "recog_valid": false, "glyph_recog_text": "X-ING", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000401780.jpg", "caption": "a man sitting on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532852.jpg", "caption": "a clock tower with a large clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139645.jpg", "caption": "a young child brushing his teeth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000401802.jpg", "caption": "a glass of champagne sitting on a table in front of a train", "annotations": [{"polygon": [[198, 169], [208, 124], [253, 121], [253, 132], [314, 130], [313, 138], [290, 151], [267, 150], [243, 145], [239, 167]], "text": "EUROSTAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CENOSTAE", "recog_valid": false, "glyph_recog_text": "EUROSTAR", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139679.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139686.jpg", "caption": "a man swinging a tennis racket at a ball", "annotations": [{"polygon": [[382, 284], [370, 290], [358, 304], [348, 319], [366, 324], [370, 336], [390, 332], [399, 341], [422, 335], [434, 340], [441, 334], [437, 324], [450, 300], [446, 296], [434, 301], [420, 300], [407, 306], [392, 319], [382, 316], [389, 302], [382, 298]], "text": "ra", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "hd", "recog_valid": false, "glyph_recog_text": "ra", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139712.jpg", "caption": "a bench sitting next to a train track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270789.jpg", "caption": "a street sign with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270796.jpg", "caption": "a grandfather clock in a room with a green curtain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270799.jpg", "caption": "a truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139729.jpg", "caption": "a parking meter on the side of a street", "annotations": [{"polygon": [[229, 16], [228, 52], [283, 54], [284, 20]], "text": "FURS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "FURS", "recog_valid": true, "glyph_recog_text": "FURS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270809.jpg", "caption": "a baseball player sliding into home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270812.jpg", "caption": "the city of bruges in belgium", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532966.jpg", "caption": "two giraffes in a cage at the zoo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000401921.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139780.jpg", "caption": "two people on bicycles on the beach with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532999.jpg", "caption": "a woman with a suitcase in an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139787.jpg", "caption": "a red train is on the tracks next to a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139789.jpg", "caption": "a man carrying a wooden box with jewelry on it", "annotations": [{"polygon": [[299, 412], [307, 418], [309, 418], [312, 408], [311, 401], [316, 389], [323, 378], [333, 371], [336, 366], [334, 359], [327, 363], [318, 371], [316, 372], [309, 382], [304, 390], [300, 403]], "text": "ASUCIACION", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "KCACIACIMN", "recog_valid": false, "glyph_recog_text": "ARUCIACION", "glyph_recog_ld": 0.6000003999996}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000401981.jpg", "caption": "a person is using a blender to make a smoothie", "annotations": [{"polygon": [[31, 165], [51, 159], [57, 156], [59, 161], [61, 178], [60, 195], [43, 202], [36, 203]], "text": "OF", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "Ou", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[25, 206], [21, 247], [43, 245], [69, 235], [71, 190], [49, 202]], "text": "26.2", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Z6L", "recog_valid": false, "glyph_recog_text": "。", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533054.jpg", "caption": "a clock tower on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139839.jpg", "caption": "a view of a plane taking off from an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139836.jpg", "caption": "a giraffe eating from a person's hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000401980.jpg", "caption": "a man and a woman with bicycles on a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270916.jpg", "caption": "a man wearing a hat and tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000008781.jpg", "caption": "a man sitting on a couch with a dog wearing a cone", "annotations": [{"polygon": [[303, 206], [312, 201], [317, 201], [328, 201], [334, 198], [345, 192], [364, 189], [365, 211], [359, 212], [356, 210], [345, 212], [334, 215], [326, 218], [313, 221]], "text": "CITY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "eTV", "recog_valid": false, "glyph_recog_text": "CITY", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402041.jpg", "caption": "three elephants performing in a circus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270972.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139914.jpg", "caption": "a desk with a laptop, a mouse, a keyboard, and a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533158.jpg", "caption": "a red train is on the tracks near a forest", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139956.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271058.jpg", "caption": "a bald man in a black shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140008.jpg", "caption": "a school bus reflected in a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271092.jpg", "caption": "a train traveling down the tracks in the mountains", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533250.jpg", "caption": "a clock tower with a clock on it", "annotations": [{"polygon": [[195, 122], [195, 138], [266, 122], [265, 104]], "text": "1899", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "1899", "recog_valid": true, "glyph_recog_text": "1899", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000008968.jpg", "caption": "a person skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271113.jpg", "caption": "a man in a suit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000008999.jpg", "caption": "a laptop with the google logo on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140073.jpg", "caption": "a street sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402224.jpg", "caption": "three men cross country skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000009017.jpg", "caption": "a plate with three donuts on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533308.jpg", "caption": "a horse eating grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000009045.jpg", "caption": "a baseball player is holding a bat ready to hit the ball", "annotations": [{"polygon": [[106, 179], [137, 173], [141, 203], [113, 207]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "13", "recog_valid": true, "glyph_recog_text": "13", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533342.jpg", "caption": "a man in a hat and coat holding a skateboard", "annotations": [{"polygon": [[256, 281], [262, 303], [269, 304], [275, 301], [274, 287], [281, 282], [301, 283], [302, 288], [301, 306], [313, 303], [317, 296], [317, 276], [299, 264], [276, 264]], "text": "StAAB", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Je", "recog_valid": false, "glyph_recog_text": "StAAB", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402297.jpg", "caption": "a dog sitting on a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402308.jpg", "caption": "a black and red train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140179.jpg", "caption": "a man is sitting in a bathtub with a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271253.jpg", "caption": "a man on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140188.jpg", "caption": "a man sitting at a table eating a meal", "annotations": [{"polygon": [[279, 226], [289, 230], [288, 222], [293, 217], [300, 213], [310, 210], [324, 214], [332, 213], [329, 202], [323, 202], [313, 202], [302, 200], [295, 201], [290, 202], [286, 206], [281, 211], [279, 217], [279, 225]], "text": "INDIVIDUAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MDhoy", "recog_valid": false, "glyph_recog_text": "NOIVIDUA", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[338, 200], [341, 213], [350, 213], [358, 218], [366, 224], [371, 231], [376, 233], [385, 237], [391, 247], [395, 251], [407, 242], [401, 238], [397, 234], [388, 228], [381, 223], [374, 220], [369, 214], [364, 210], [360, 207], [352, 204], [339, 201]], "text": "REBELLION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REBELLION", "recog_valid": true, "glyph_recog_text": "REBELLON", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[159, 382], [145, 407], [163, 420], [175, 394], [159, 382]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271282.jpg", "caption": "a woman playing tennis on a court", "annotations": [{"polygon": [[138, 223], [203, 225], [202, 263], [137, 262]], "text": "Bell", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bell", "recog_valid": true, "glyph_recog_text": "Bell", "glyph_recog_ld": 1.0}, {"polygon": [[423, 236], [417, 239], [413, 244], [414, 248], [420, 248], [422, 246], [424, 264], [425, 268], [428, 268], [427, 248], [438, 250], [445, 247], [454, 246], [461, 247], [472, 245], [477, 238], [476, 236], [445, 237], [443, 229], [433, 238]], "text": "qnebec bonjourquebec.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Fneku", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533431.jpg", "caption": "a fruit stand with bananas, apples, and other fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533451.jpg", "caption": "a group of people sitting at a table eating food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000009178.jpg", "caption": "a train is on the tracks next to a mountain", "annotations": [{"polygon": [[171, 235], [170, 268], [222, 270], [223, 232]], "text": "KOPAIL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KAii", "recog_valid": false, "glyph_recog_text": "KOPAL", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402406.jpg", "caption": "a baseball player sliding into home plate", "annotations": [{"polygon": [[163, 211], [163, 211], [168, 211], [176, 207], [194, 192], [198, 191], [198, 199], [194, 206], [190, 214], [187, 216], [172, 225], [168, 227], [162, 227], [157, 227]], "text": "MT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "MT", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533484.jpg", "caption": "a group of people sitting on a couch playing a video game", "annotations": [{"polygon": [[54, 150], [79, 143], [84, 133], [89, 133], [91, 141], [100, 135], [103, 150], [65, 176], [51, 173]], "text": "Replay", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "eyoly", "recog_valid": false, "glyph_recog_text": "Replay", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271344.jpg", "caption": "a woman standing next to an old red truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402420.jpg", "caption": "a man wearing a white shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533509.jpg", "caption": "a small airplane flying in the air with a float", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000009231.jpg", "caption": "a black and white cat sitting next to a pile of luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533522.jpg", "caption": "a woman holding a wine glass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140312.jpg", "caption": "a truck driving down a street in a village", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533529.jpg", "caption": "a woman is playing tennis on a tennis court", "annotations": [{"polygon": [[25, 113], [15, 166], [162, 191], [167, 134]], "text": "LOOOM", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "000M", "recog_valid": false, "glyph_recog_text": "LOOOM", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[24, 183], [19, 213], [47, 228], [119, 230], [120, 196]], "text": "sport", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "sport", "recog_valid": true, "glyph_recog_text": "sport", "glyph_recog_ld": 1.0}, {"polygon": [[1, 227], [-1, 257], [63, 269], [76, 244]], "text": "OOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "voor", "recog_valid": false, "glyph_recog_text": "OOR", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[0, 264], [-2, 304], [29, 309], [31, 285]], "text": "le", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "@", "recog_valid": false, "glyph_recog_text": "-", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[43, 274], [41, 312], [126, 328], [132, 326], [137, 290]], "text": "family", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "fa nil", "recog_valid": false, "glyph_recog_text": "family", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271395.jpg", "caption": "a police car is driving down the road with other vehicles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271417.jpg", "caption": "a tall ship docked at a dock with palm trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140373.jpg", "caption": "a pizza box with a slice missing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402527.jpg", "caption": "a young boy holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533601.jpg", "caption": "a motorcycle parked in front of a building", "annotations": [{"polygon": [[227, 164], [228, 176], [335, 161], [334, 146]], "text": "VATICANO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VATICANO", "recog_valid": true, "glyph_recog_text": "VATICANO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533603.jpg", "caption": "a cow standing on a sidewalk with a red collar", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271495.jpg", "caption": "a man standing in a kitchen preparing food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140445.jpg", "caption": "a narrow alley with many shops and people walking around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533666.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271522.jpg", "caption": "a group of stuffed bears wearing christmas sweaters", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140450.jpg", "caption": "a man giving a skateboarder a high five", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402598.jpg", "caption": "a stop sign and street sign in front of a sunset", "annotations": [{"polygon": [[305, 298], [305, 337], [413, 337], [413, 297]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402621.jpg", "caption": "a pizza on a cooling rack with a beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271622.jpg", "caption": "a seagull flying over a colorful umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140563.jpg", "caption": "a group of people wearing orange boxes on their heads", "annotations": [{"polygon": [[205, 145], [228, 128], [245, 190], [221, 209]], "text": "B", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CO", "recog_valid": false, "glyph_recog_text": "B", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533784.jpg", "caption": "a display of snowboards in a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140581.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533827.jpg", "caption": "a yellow taxi driving down the street with a man on the roof", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140613.jpg", "caption": "a stop sign on a metal fence", "annotations": [{"polygon": [[70, 300], [70, 198], [337, 208], [309, 315], [70, 300], [71, 301]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140623.jpg", "caption": "a black and white photo of a clock on a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140630.jpg", "caption": "a stop sign with a sign that says all way", "annotations": [{"polygon": [[218, 81], [368, 72], [373, 135], [220, 143]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271729.jpg", "caption": "a group of surfboards on display in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271736.jpg", "caption": "a plate of food with broccoli and chicken", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140667.jpg", "caption": "a man in an orange shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140690.jpg", "caption": "a tennis match with two people playing on the court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271763.jpg", "caption": "an old black and red train engine sitting in a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140693.jpg", "caption": "a baseball player is swinging a bat at a ball", "annotations": [{"polygon": [[427, 331], [413, 342], [396, 321], [411, 308], [419, 315]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "10", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271791.jpg", "caption": "a blue and white train car that is sitting on the ground", "annotations": [{"polygon": [[250, 163], [188, 212], [183, 203], [244, 153]], "text": "(01989) 564505", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S05795(68610)", "recog_valid": false, "glyph_recog_text": "101089F84505", "glyph_recog_ld": 0.23076982248475186}, {"polygon": [[241, 175], [245, 183], [203, 215], [198, 208]], "text": "Ross-on-Wye", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "aAu-ue-ssoy", "recog_valid": false, "glyph_recog_text": "Roapefet", "glyph_recog_ld": 0.0909099173546205}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533941.jpg", "caption": "a large clock with a skeleton on it", "annotations": [{"polygon": [[55, 310], [54, 333], [95, 340], [93, 316]], "text": "OC", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "OC", "recog_valid": true, "glyph_recog_text": "oc", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[63, 390], [98, 415], [106, 411], [76, 384]], "text": "SVS", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "YS", "recog_valid": false, "glyph_recog_text": "sv9", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[1, 252], [26, 272], [53, 264], [24, 242]], "text": "II", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "T", "recog_valid": false, "glyph_recog_text": "i i", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271806.jpg", "caption": "an old black and white photo of a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402880.jpg", "caption": "an emirates airplane parked at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140736.jpg", "caption": "a toy fire truck with an american flag on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140738.jpg", "caption": "a man and a woman sitting in a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533955.jpg", "caption": "a woman with a heart cut out of her shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271814.jpg", "caption": "a woman standing in front of a vending machine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140758.jpg", "caption": "a parking meter on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140768.jpg", "caption": "a baby sitting on a toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533998.jpg", "caption": "a car is being lifted onto a flatbed tow truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271862.jpg", "caption": "a street with a tall building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534016.jpg", "caption": "a parking meter on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402945.jpg", "caption": "a baseball player throwing a pitch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000009745.jpg", "caption": "a speed boat is going down the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534034.jpg", "caption": "a woman sitting on a bench next to a stop sign", "annotations": [{"polygon": [[122, 132], [118, 170], [204, 168], [204, 132]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140816.jpg", "caption": "a person walking down the street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534044.jpg", "caption": "a train is on the tracks", "annotations": [{"polygon": [[69, 230], [70, 261], [85, 271], [94, 270], [102, 266], [102, 254], [98, 232], [91, 227], [85, 226]], "text": "0", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "C", "recog_valid": false, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402973.jpg", "caption": "a stop sign on a street corner", "annotations": [{"polygon": [[76, 124], [70, 182], [214, 193], [225, 139]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140850.jpg", "caption": "three suitcases sitting on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271924.jpg", "caption": "a remote control sitting on top of a wooden table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000009789.jpg", "caption": "a woman is holding a cake to a baby", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534094.jpg", "caption": "a gas station with a computer monitor on the screen", "annotations": [{"polygon": [[144, 11], [144, 39], [248, 38], [248, 9]], "text": "Quality", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Quality", "recog_valid": true, "glyph_recog_text": "Quality", "glyph_recog_ld": 1.0}, {"polygon": [[363, 1], [363, 48], [405, 48], [403, 0]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "5", "recog_valid": true, "glyph_recog_text": "5", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140898.jpg", "caption": "a side view mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271972.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534117.jpg", "caption": "a sign that says terra on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534124.jpg", "caption": "a cake with a suitcase on top", "annotations": [{"polygon": [[45, 228], [73, 226], [75, 339], [41, 344]], "text": "HELEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HELEN", "recog_valid": true, "glyph_recog_text": "工山JW", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271992.jpg", "caption": "a pink refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403064.jpg", "caption": "a car driving down the road with cows in the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140940.jpg", "caption": "a group of baseball players standing around a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140943.jpg", "caption": "a bus stop with people standing outside and a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140946.jpg", "caption": "a street sign with a one way arrow on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140954.jpg", "caption": "a man and a woman playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272040.jpg", "caption": "a car is parked at a gas station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534195.jpg", "caption": "a busy street with people walking and riding bikes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272059.jpg", "caption": "'bobby doerr' statue at the boston red sox museum", "annotations": [{"polygon": [[318, 218], [328, 221], [336, 226], [344, 232], [351, 231], [350, 222], [356, 216], [356, 213], [336, 201], [322, 196], [318, 202], [315, 214]], "text": "sex", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SOX", "recog_valid": false, "glyph_recog_text": "sex", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140988.jpg", "caption": "a laptop computer, a notebook, a camera, a camera lens, a camera, a camera lens, a camera, a camera lens, a camera, a", "annotations": [{"polygon": [[215, 253], [223, 243], [230, 240], [270, 268], [261, 281]], "text": "KICK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KICK", "recog_valid": true, "glyph_recog_text": "KICK", "glyph_recog_ld": 1.0}, {"polygon": [[278, 272], [304, 287], [300, 302], [266, 283]], "text": "ASS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ASS", "recog_valid": true, "glyph_recog_text": "ASS", "glyph_recog_ld": 1.0}, {"polygon": [[212, 259], [301, 312], [301, 315], [293, 327], [205, 275]], "text": "KICK ASS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIASTRS", "recog_valid": false, "glyph_recog_text": "KICK ASS", "glyph_recog_ld": 0.2500009374988281}, {"polygon": [[361, 344], [433, 326], [434, 346], [429, 348], [389, 359], [370, 361], [366, 361], [363, 359], [360, 349]], "text": "SQUAW", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SQUAW", "recog_valid": true, "glyph_recog_text": "SQUAW", "glyph_recog_ld": 1.0}, {"polygon": [[271, 467], [277, 496], [324, 478], [446, 391], [436, 373], [369, 405]], "text": "unifficialsquaw.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Bnofandequauw.com", "recog_valid": false, "glyph_recog_text": "nifficialsquaw.con", "glyph_recog_ld": 0.38888922839487305}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140992.jpg", "caption": "a man and a little girl on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140999.jpg", "caption": "a laptop computer sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141005.jpg", "caption": "three people posing for a photo in a hospital bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272081.jpg", "caption": "a blue and yellow train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000009945.jpg", "caption": "a stop sign with an exit sign and a street sign", "annotations": [{"polygon": [[172, 108], [170, 169], [364, 199], [364, 143]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[219, 403], [219, 433], [308, 428], [307, 400]], "text": "EXIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "EXIT", "recog_valid": true, "glyph_recog_text": "EXIT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000009946.jpg", "caption": "a man riding a wave on a surfboard", "annotations": [{"polygon": [[259, 130], [260, 144], [295, 152], [321, 163], [348, 186], [353, 200], [371, 202], [361, 181], [349, 166], [328, 150], [296, 133]], "text": "RESERVI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RESER", "recog_valid": false, "glyph_recog_text": "RESERVI", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534247.jpg", "caption": "a black vase with white flowers sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534259.jpg", "caption": "a train on the tracks with a mountain in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403198.jpg", "caption": "a young boy is sitting on the grass holding a baseball bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141056.jpg", "caption": "a display of pizza slices", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534297.jpg", "caption": "a woman in a hat holding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272155.jpg", "caption": "a man sitting at a desk with a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272157.jpg", "caption": "a plate with a sandwich and a beer", "annotations": [{"polygon": [[283, 164], [283, 164], [283, 190], [313, 195], [314, 186], [324, 185], [328, 177], [327, 169]], "text": "HP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HP", "recog_valid": true, "glyph_recog_text": "HP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010015.jpg", "caption": "a man skiing down a slope with a few other people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272160.jpg", "caption": "a man holding a tennis racket", "annotations": [{"polygon": [[466, 94], [462, 142], [392, 152], [394, 103]], "text": "w", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "U", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141095.jpg", "caption": "a bus is parked at night in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534314.jpg", "caption": "a crowd of people standing in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403249.jpg", "caption": "a large blue airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141114.jpg", "caption": "a man holding bananas on a podium with other cyclists", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534332.jpg", "caption": "two double decker buses are parked on a street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534331.jpg", "caption": "a red tree with a fire hydrant in front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272219.jpg", "caption": "a truck with a large tire on top of it in the desert", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272218.jpg", "caption": "a red double decker bus parked in front of a pavilion", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272223.jpg", "caption": "a man taking a selfie with a toothbrush", "annotations": [{"polygon": [[194, 377], [213, 388], [224, 401], [226, 418], [246, 414], [239, 391], [231, 377], [213, 365], [198, 359]], "text": "DLEIFNE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "DLEIFNE", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[158, 416], [167, 396], [179, 386], [189, 381], [189, 363], [175, 368], [160, 377], [151, 391], [142, 407], [142, 416]], "text": "TENNIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "21W437", "recog_valid": false, "glyph_recog_text": "TENNIS", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141180.jpg", "caption": "a laptop, a cell phone, and a headset on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010125.jpg", "caption": "a black and white photo of a sign for public market center", "annotations": [{"polygon": [[236, 122], [235, 164], [67, 135], [67, 80]], "text": "PUBLIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PUBLIC", "recog_valid": true, "glyph_recog_text": "PUBLIC", "glyph_recog_ld": 1.0}, {"polygon": [[217, 173], [206, 222], [13, 206], [14, 143]], "text": "MARKET", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "MARKET", "recog_valid": true, "glyph_recog_text": "MARKET", "glyph_recog_ld": 1.0}, {"polygon": [[239, 237], [240, 276], [63, 284], [48, 258], [62, 231]], "text": "CENTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CENTER", "recog_valid": true, "glyph_recog_text": "CENTER", "glyph_recog_ld": 1.0}, {"polygon": [[424, 374], [415, 390], [416, 399], [452, 405], [493, 410], [501, 404], [505, 385], [473, 381], [457, 383]], "text": "Coca Cola", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "bcuColn", "recog_valid": false, "glyph_recog_text": "Coca Cola", "glyph_recog_ld": 0.44444506172770915}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141200.jpg", "caption": "a woman walking down the street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141204.jpg", "caption": "a red brick building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010136.jpg", "caption": "a bookcase with many books and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403358.jpg", "caption": "a woman sitting at a table with cakes and pastries", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534448.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010161.jpg", "caption": "a bowl of soup with meat and vegetables on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272310.jpg", "caption": "a man laying on the ground next to a plane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272312.jpg", "caption": "a small airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010179.jpg", "caption": "boston red sox, boston, boston, boston, boston, boston, boston, boston, boston, boston", "annotations": [{"polygon": [[156, 262], [178, 241], [196, 233], [219, 232], [240, 242], [252, 258], [242, 271], [220, 255], [203, 252], [178, 264], [169, 277]], "text": "BOSTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "sos tor", "recog_valid": false, "glyph_recog_text": "BOSTON", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[288, 253], [311, 232], [342, 221], [379, 231], [384, 254], [363, 248], [346, 242], [319, 255], [293, 277]], "text": "BOSTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BoSTON", "recog_valid": false, "glyph_recog_text": "BOSTON", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403398.jpg", "caption": "a red fire hydrant with a sign on it", "annotations": [{"polygon": [[205, 218], [309, 221], [307, 253], [203, 249]], "text": "1989", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1989", "recog_valid": true, "glyph_recog_text": "1989", "glyph_recog_ld": 1.0}, {"polygon": [[190, 278], [316, 286], [312, 327], [187, 320]], "text": "OPEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UPEN", "recog_valid": false, "glyph_recog_text": "OPEN", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[166, 392], [197, 400], [250, 407], [300, 405], [323, 404], [323, 431], [293, 437], [253, 440], [197, 433], [166, 425]], "text": "UL FMN", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ULEM", "recog_valid": false, "glyph_recog_text": "UL FMN", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[194, 445], [216, 450], [237, 453], [263, 455], [273, 455], [271, 477], [234, 475], [206, 470], [193, 469]], "text": "12 / 13", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "123613", "recog_valid": false, "glyph_recog_text": "12/13", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403404.jpg", "caption": "a man standing at a table with apples and other fruits", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534513.jpg", "caption": "a man holding a tennis racket on a tennis court", "annotations": [{"polygon": [[218, 109], [220, 136], [287, 128], [285, 101]], "text": "FedEx", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "FedEx", "recog_valid": true, "glyph_recog_text": "FedEx", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010229.jpg", "caption": "a blue truck with a bike rack on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272384.jpg", "caption": "a motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141331.jpg", "caption": "a traffic light and a street sign with birds flying above", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010275.jpg", "caption": "people are sitting on a train with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534564.jpg", "caption": "a display of vegetables at a market", "annotations": [{"polygon": [[67, 226], [95, 199], [99, 206], [76, 232], [67, 231]], "text": "PURPLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "PORPUE", "recog_valid": false, "glyph_recog_text": "PUIRPLE", "glyph_recog_ld": 0.5714291836725947}, {"polygon": [[116, 216], [100, 242], [118, 261], [133, 244], [124, 236], [129, 221]], "text": "250", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "092", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534578.jpg", "caption": "a street sign with a blue and white sign", "annotations": [{"polygon": [[215, 147], [209, 284], [239, 284], [243, 145]], "text": "SYNC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0>20", "recog_valid": false, "glyph_recog_text": "0>ZU", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[242, 427], [244, 459], [365, 455], [364, 427], [308, 425]], "text": "Western", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Western", "recog_valid": true, "glyph_recog_text": "Western", "glyph_recog_ld": 1.0}, {"polygon": [[389, 421], [388, 454], [425, 453], [425, 429], [401, 421]], "text": "AV", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Av", "recog_valid": false, "glyph_recog_text": "AV", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534579.jpg", "caption": "a bus parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141372.jpg", "caption": "a large clock on a post in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141382.jpg", "caption": "a man holding a tennis racquet on a tennis court", "annotations": [{"polygon": [[268, 214], [267, 249], [481, 248], [472, 225], [359, 217], [269, 214]], "text": "J.P.Morran", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JPMoran", "recog_valid": false, "glyph_recog_text": "J.P.Morran", "glyph_recog_ld": 0.7000002999996999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272490.jpg", "caption": "a keyboard and mouse on a desk", "annotations": [{"polygon": [[338, 128], [338, 154], [409, 145], [413, 120], [407, 121], [407, 128], [350, 134], [350, 127]], "text": "fovari", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "fovari", "recog_valid": true, "glyph_recog_text": "fovari", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534662.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534663.jpg", "caption": "a large jetliner sitting on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272538.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010420.jpg", "caption": "a sign on a fence in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534711.jpg", "caption": "a woman with a suitcase in an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534723.jpg", "caption": "two yellow school buses parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010444.jpg", "caption": "a donut on a white plate next to a teapot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272613.jpg", "caption": "a computer screen with a message on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403687.jpg", "caption": "a dog laying on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534763.jpg", "caption": "a busy city street with a yellow taxi cab", "annotations": [{"polygon": [[11, 176], [0, 184], [0, 245], [10, 247], [31, 239], [44, 240], [56, 241], [73, 242], [86, 254], [93, 257], [96, 244], [106, 242], [114, 241], [117, 234], [115, 210], [106, 207], [79, 207], [52, 202], [32, 182]], "text": "chevys", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Chans", "recog_valid": false, "glyph_recog_text": "chevys", "glyph_recog_ld": 0.3333344444425925}, {"polygon": [[3, 285], [2, 289], [0, 303], [0, 310], [4, 310], [10, 299], [21, 304], [60, 314], [65, 308], [63, 299], [34, 286], [20, 281]], "text": "Pizza", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Plee", "recog_valid": false, "glyph_recog_text": "Pizza", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141557.jpg", "caption": "a young boy and a dog sitting on a couch reading books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010498.jpg", "caption": "two men in uniform standing in front of a machine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534791.jpg", "caption": "a street with a traffic light and buildings in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272647.jpg", "caption": "a desk with a computer and books on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272653.jpg", "caption": "a woman sitting on a train with her bag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403737.jpg", "caption": "a skier is going down a slope in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010560.jpg", "caption": "a man with a beard and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010566.jpg", "caption": "a street sign with a red and black sign", "annotations": [{"polygon": [[372, 254], [458, 245], [459, 273], [372, 280]], "text": "THINK", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "THINK", "recog_valid": true, "glyph_recog_text": "THINK", "glyph_recog_ld": 1.0}, {"polygon": [[8, 330], [0, 357], [31, 363], [48, 361], [53, 340]], "text": "fifth Avenue", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "86n09com", "recog_valid": false, "glyph_recog_text": "ftbheoue", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534867.jpg", "caption": "a man holding a snowboard", "annotations": [{"polygon": [[285, 302], [279, 248], [265, 249], [266, 240], [259, 233], [249, 233], [241, 239], [240, 248], [241, 250], [239, 256], [253, 298], [276, 303]], "text": "AP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "dy", "recog_valid": false, "glyph_recog_text": "<0", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010581.jpg", "caption": "a trash can full of trash and paper", "annotations": [{"polygon": [[366, 475], [365, 483], [380, 473], [395, 459], [395, 453]], "text": "HOUSE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "5口A4", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[361, 283], [323, 316], [329, 327], [370, 293]], "text": "FAMILY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "10", "recog_valid": false, "glyph_recog_text": "FAMILY", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[379, 288], [303, 349], [310, 359], [389, 299]], "text": "HOMELESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SS-WOH", "recog_valid": false, "glyph_recog_text": "HOMELESS", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010579.jpg", "caption": "a man holding a donut and a drink in a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272741.jpg", "caption": "a young woman sitting in a kitchen with a cigarette in her mouth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272743.jpg", "caption": "a man walking down a sidewalk next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534892.jpg", "caption": "a woman sitting at a table with a large piece of cake", "annotations": [{"polygon": [[17, 155], [15, 201], [78, 210], [98, 165]], "text": "Y", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Y", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403826.jpg", "caption": "a baseball player in a black uniform throwing a ball", "annotations": [{"polygon": [[195, 330], [218, 301], [237, 314], [218, 346]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "寸", "recog_valid": false, "glyph_recog_text": "t", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534901.jpg", "caption": "a stop sign with a building in the background", "annotations": [{"polygon": [[118, 84], [157, 164], [179, 205], [201, 261], [194, 287], [175, 312], [137, 337], [30, 168], [97, 85]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010614.jpg", "caption": "a table with two plates of pizza and a glass of orange juice", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534925.jpg", "caption": "a tray of food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010654.jpg", "caption": "a sign for a museum and a house in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141759.jpg", "caption": "a man standing on a street holding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272832.jpg", "caption": "a green and white bus on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010701.jpg", "caption": "a baseball player is throwing a pitch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272846.jpg", "caption": "carolux airlines - carolux airlines", "annotations": [{"polygon": [[314, 263], [314, 263], [379, 265], [383, 258], [387, 259], [386, 264], [413, 265], [411, 284], [362, 282], [361, 288], [353, 289], [348, 284], [345, 282], [315, 281]], "text": "cargolux", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "cargolux", "recog_valid": true, "glyph_recog_text": "cargolux", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010710.jpg", "caption": "a baseball player is at home plate waiting to hit the ball", "annotations": [{"polygon": [[397, 150], [398, 181], [432, 181], [430, 150]], "text": "Tg", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Tg", "recog_valid": true, "glyph_recog_text": "Tg", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272859.jpg", "caption": "a double decker bus driving down a snowy road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535010.jpg", "caption": "a large flock of sheep in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403940.jpg", "caption": "a large passenger jet on the tarmac at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535033.jpg", "caption": "a woman sitting on a couch reading a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141834.jpg", "caption": "virgin airlines boeing 757-200", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141848.jpg", "caption": "a yellow train with blue and yellow stripes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404015.jpg", "caption": "a skateboarder in the air doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141873.jpg", "caption": "two birds sitting on a branch with leaves", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535100.jpg", "caption": "a woman sitting in front of an airplane window", "annotations": [{"polygon": [[337, 16], [344, 0], [358, 0], [375, 43], [360, 77]], "text": "S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "S", "recog_valid": true, "glyph_recog_text": "s", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[370, 499], [366, 475], [412, 469], [417, 492]], "text": "ACE", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "ACE", "recog_valid": true, "glyph_recog_text": "ACE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535113.jpg", "caption": "a street corner with a pole and a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404050.jpg", "caption": "a bike is parked on a boat in a harbor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535125.jpg", "caption": "a red chair sitting on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535136.jpg", "caption": "a hand holding a remote control", "annotations": [{"polygon": [[407, 341], [379, 382], [372, 377], [402, 336]], "text": "SONY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SONY", "recog_valid": true, "glyph_recog_text": "4日81", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141922.jpg", "caption": "two men playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535139.jpg", "caption": "a large airplane with a space shuttle on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141920.jpg", "caption": "a man sitting on a toilet reading comics", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141927.jpg", "caption": "a large airplane sitting on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010867.jpg", "caption": "a fire hydrant is painted on a brick wall", "annotations": [{"polygon": [[109, 148], [162, 144], [245, 147], [293, 152], [282, 184], [208, 181], [185, 177], [155, 178], [146, 186], [106, 187]], "text": "HYDRANT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NYPRANT", "recog_valid": false, "glyph_recog_text": "HYDRANT", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273049.jpg", "caption": "a living room with a couch, a chair, a bookcase and a lamp", "annotations": [{"polygon": [[95, 92], [132, 97], [142, 139], [95, 138]], "text": "B", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "B", "recog_valid": true, "glyph_recog_text": "B", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404127.jpg", "caption": "a man playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535202.jpg", "caption": "a man sitting at a desk with a computer and a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141988.jpg", "caption": "two women sitting at a table with pizza and drinks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535211.jpg", "caption": "a horse drawn wagon with a load of goods on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273068.jpg", "caption": "a parking meter next to a red brick wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535212.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535218.jpg", "caption": "a man on a beach with a frisbee in his hand", "annotations": [{"polygon": [[405, 254], [383, 302], [416, 304], [416, 254]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "T", "recog_valid": false, "glyph_recog_text": "<", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142002.jpg", "caption": "a yellow biplane parked on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273082.jpg", "caption": "a large clock sitting in a field behind a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273088.jpg", "caption": "a man standing outside of a restaurant", "annotations": [{"polygon": [[165, 118], [163, 152], [206, 167], [206, 134]], "text": "Fish", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FYadl", "recog_valid": false, "glyph_recog_text": "Fish", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[79, 83], [76, 129], [153, 152], [155, 112]], "text": "Super", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Supear", "recog_valid": false, "glyph_recog_text": "Super", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535233.jpg", "caption": "a pole with several signs attached to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535234.jpg", "caption": "a baseball player sliding into home plate", "annotations": [{"polygon": [[108, 196], [108, 196], [118, 198], [128, 203], [134, 204], [151, 213], [152, 217], [149, 224], [145, 227], [133, 220], [126, 216], [120, 215], [108, 210]], "text": "TEXAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TEXAS", "recog_valid": true, "glyph_recog_text": "TEXAS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535250.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142034.jpg", "caption": "a sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404183.jpg", "caption": "three double decker buses parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535265.jpg", "caption": "a dog wearing a cowboy hat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273142.jpg", "caption": "a desk with a computer, monitor, and mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011004.jpg", "caption": "a large building with a truck parked in front", "annotations": [{"polygon": [[172, 194], [247, 204], [247, 229], [173, 222]], "text": "DOLLAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UULLAI", "recog_valid": false, "glyph_recog_text": "DOLLAR", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142098.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142100.jpg", "caption": "a portrait of a man with a beard and a tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011029.jpg", "caption": "two women pose with a large teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273204.jpg", "caption": "two men playing frisbee", "annotations": [{"polygon": [[110, 220], [102, 212], [148, 168], [153, 178]], "text": "NURD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NUR·D", "recog_valid": false, "glyph_recog_text": "NURD", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011075.jpg", "caption": "a woman is standing in front of a television screen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273245.jpg", "caption": "a yellow and green double decker bus", "annotations": [{"polygon": [[159, 200], [159, 211], [161, 211], [159, 230], [190, 227], [192, 208], [194, 207], [193, 197], [171, 205], [170, 196]], "text": "TOUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tour", "recog_valid": false, "glyph_recog_text": "TOUR", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404322.jpg", "caption": "a person riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142187.jpg", "caption": "a street sign on a pole with a blue and white striped pole", "annotations": [{"polygon": [[233, 63], [280, 48], [279, 31], [233, 45]], "text": "Ballard", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Ballard", "recog_valid": true, "glyph_recog_text": "8allani", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142191.jpg", "caption": "a white and red train at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404350.jpg", "caption": "a cat laying on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404351.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142229.jpg", "caption": "a group of people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535464.jpg", "caption": "a boy is holding a kite on the beach", "annotations": [{"polygon": [[154, 208], [159, 208], [166, 204], [173, 198], [186, 188], [187, 185], [187, 193], [178, 204], [171, 208], [166, 217], [158, 219], [154, 214]], "text": "USS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "zong", "recog_valid": false, "glyph_recog_text": "USs", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404403.jpg", "caption": "a man in a suit and tie standing next to a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142262.jpg", "caption": "a woman in a black coat and sunglasses riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142263.jpg", "caption": "a street with a lot of buildings and pigeons", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142299.jpg", "caption": "two horses pulling a carriage down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273371.jpg", "caption": "a kitchen with a refrigerator, a stove and a shelf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011229.jpg", "caption": "a boy is doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011231.jpg", "caption": "a black and white photo of two men on a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011233.jpg", "caption": "a large open field with a pavilion and trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142321.jpg", "caption": "a man standing in the woods holding a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011265.jpg", "caption": "a bridge over a street with a clock tower in the middle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404486.jpg", "caption": "a conveyor belt with doughnuts on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535561.jpg", "caption": "a man and a woman standing in the rain holding an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142349.jpg", "caption": "a double decker bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142352.jpg", "caption": "a group of women playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142364.jpg", "caption": "a patio table and chairs under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011294.jpg", "caption": "a solar powered electric meter on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011299.jpg", "caption": "an italian airline plane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011302.jpg", "caption": "a fork and a cake with a car on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142413.jpg", "caption": "a refrigerator in a room with a desk and clothes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273497.jpg", "caption": "a cake with a teddy bear on top", "annotations": [{"polygon": [[168, 388], [191, 397], [220, 407], [263, 409], [322, 404], [348, 395], [399, 361], [390, 398], [372, 412], [349, 427], [294, 439], [261, 444], [210, 437], [166, 419]], "text": "JOOWON", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "oowet", "recog_valid": false, "glyph_recog_text": "JOOWON", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[202, 326], [236, 326], [293, 327], [347, 322], [346, 377], [274, 383], [205, 382]], "text": "100", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1000", "recog_valid": false, "glyph_recog_text": "100", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142428.jpg", "caption": "a woman in a pink shirt is riding a horse", "annotations": [{"polygon": [[209, 374], [202, 414], [234, 420], [241, 377]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535650.jpg", "caption": "vintage rug with a pair of shoes and a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404587.jpg", "caption": "a woman standing next to a microwave oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535669.jpg", "caption": "a group of baseball players standing on the field", "annotations": [{"polygon": [[25, 216], [26, 243], [64, 248], [66, 217], [31, 214]], "text": "22", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "22", "recog_valid": true, "glyph_recog_text": "22", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142466.jpg", "caption": "a boat filled with produce", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011422.jpg", "caption": "a stop sign with a no parking sign on it", "annotations": [{"polygon": [[186, 133], [180, 151], [184, 164], [175, 190], [185, 206], [205, 204], [217, 195], [227, 177], [247, 185], [262, 181], [264, 155], [277, 152], [290, 164], [307, 167], [327, 159], [332, 149], [340, 133], [341, 124], [353, 122], [361, 147], [383, 139], [375, 101], [405, 89], [412, 73], [408, 55], [390, 46], [378, 52]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[238, 213], [239, 236], [350, 204], [344, 180]], "text": "DANCIN'", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DANCIN", "recog_valid": false, "glyph_recog_text": "DANCIN", "glyph_recog_ld": 1.0}, {"polygon": [[227, 297], [222, 331], [289, 316], [275, 284]], "text": "ALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALL", "recog_valid": true, "glyph_recog_text": "ALL", "glyph_recog_ld": 1.0}, {"polygon": [[312, 276], [324, 309], [391, 292], [390, 253]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[251, 355], [253, 394], [307, 383], [300, 348]], "text": "NO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NO", "recog_valid": true, "glyph_recog_text": "NO", "glyph_recog_ld": 1.0}, {"polygon": [[253, 411], [257, 447], [384, 433], [377, 396]], "text": "PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "PARKING", "recog_valid": true, "glyph_recog_text": "PARKING", "glyph_recog_ld": 1.0}, {"polygon": [[262, 463], [270, 499], [385, 494], [380, 450]], "text": "WITHIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WITHIN", "recog_valid": true, "glyph_recog_text": "WITHIN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142497.jpg", "caption": "a book on a table", "annotations": [{"polygon": [[324, 281], [328, 288], [409, 261], [404, 254]], "text": "COMPLETE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "COMPLETE", "recog_valid": true, "glyph_recog_text": "GRRESRIE", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[346, 304], [411, 280], [416, 289], [349, 311]], "text": "EDITION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EDITION", "recog_valid": true, "glyph_recog_text": "ECITION", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[304, 242], [383, 215], [387, 224], [307, 251]], "text": "HAMLET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HAMLET", "recog_valid": true, "glyph_recog_text": "MAYSEE", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535713.jpg", "caption": "a motorcycle parked in front of a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273591.jpg", "caption": "a slice of pizza on a white plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273592.jpg", "caption": "a police officer on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535737.jpg", "caption": "a man holding a sign on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273600.jpg", "caption": "a person holding a samsung cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142534.jpg", "caption": "a mouse and keyboard on an orange surface", "annotations": [{"polygon": [[22, 44], [24, 59], [105, 29], [100, 14], [96, 17], [22, 43]], "text": "Microsoft", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Microsoft", "recog_valid": true, "glyph_recog_text": "Microsot", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142563.jpg", "caption": "a man riding a horse", "annotations": [{"polygon": [[414, 410], [413, 419], [451, 440], [457, 430]], "text": "095", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "695", "recog_valid": false, "glyph_recog_text": "0%5", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142570.jpg", "caption": "a window with a view", "annotations": [{"polygon": [[81, 243], [86, 264], [128, 247], [126, 234]], "text": "Astro", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Bstn", "recog_valid": false, "glyph_recog_text": "Astro", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[202, 342], [204, 370], [254, 345], [252, 333]], "text": "Astr", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Rey", "recog_valid": false, "glyph_recog_text": "Astr", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273641.jpg", "caption": "a train traveling down the tracks next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273650.jpg", "caption": "a man on a skateboard in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273653.jpg", "caption": "the los angeles kings celebrate their nhl championship win with a parade", "annotations": [{"polygon": [[352, 246], [353, 286], [430, 292], [431, 256]], "text": "LAKINGS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LR KINGS", "recog_valid": false, "glyph_recog_text": "LAKINGS", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142587.jpg", "caption": "a stuffed animal sitting next to a laptop", "annotations": [{"polygon": [[69, 232], [101, 209], [104, 244], [76, 257]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "P", "recog_valid": true, "glyph_recog_text": "P", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011528.jpg", "caption": "a computer monitor and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404766.jpg", "caption": "a baseball player throwing a pitch on a field", "annotations": [{"polygon": [[221, 211], [240, 238], [258, 230], [259, 223], [253, 218], [249, 215], [247, 211], [239, 208], [229, 208]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "12", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011624.jpg", "caption": "two people sitting on a bench with skateboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273771.jpg", "caption": "a cat is sitting on a book shelf", "annotations": [{"polygon": [[294, 113], [295, 135], [307, 136], [308, 133], [351, 143], [350, 133], [307, 124], [307, 116]], "text": "NATURALEZA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NAURAEZA", "recog_valid": false, "glyph_recog_text": "TR心E", "glyph_recog_ld": 0.2500009374988281}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404849.jpg", "caption": "a snowboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535928.jpg", "caption": "a laptop computer sitting on a desk in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535961.jpg", "caption": "a refrigerator with a lot of magnets on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011673.jpg", "caption": "a crane is lifting a large statue in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011680.jpg", "caption": "a skateboarder doing a trick on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142757.jpg", "caption": "a zebra grazing on the ground in the middle of a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011697.jpg", "caption": "a street with many signs and cars parked in front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273850.jpg", "caption": "a baseball player in red and white uniform pitching a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011707.jpg", "caption": "a silver airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142793.jpg", "caption": "a black and white photo of a skier in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273874.jpg", "caption": "a bunch of ties hanging on a wall", "annotations": [{"polygon": [[148, 374], [186, 391], [189, 413], [186, 413], [149, 393], [144, 389], [143, 380], [144, 376]], "text": "CLOUDS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "OOID", "recog_valid": false, "glyph_recog_text": "CLOUDS", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142803.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404948.jpg", "caption": "a man eating a large sandwich in a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273879.jpg", "caption": "a baseball player pitching a ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011737.jpg", "caption": "a baseball player throwing a pitch on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404964.jpg", "caption": "a group of people riding bikes on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273898.jpg", "caption": "a man in white is walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011758.jpg", "caption": "a toilet with writing on the wall and a sign on the door", "annotations": [{"polygon": [[321, 145], [318, 168], [362, 178], [363, 160]], "text": "SAID", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SAID", "recog_valid": true, "glyph_recog_text": "SAID", "glyph_recog_ld": 1.0}, {"polygon": [[369, 162], [366, 182], [409, 195], [413, 172]], "text": "HE''S", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "H", "recog_valid": false, "glyph_recog_text": "HE\"S", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[288, 169], [288, 193], [332, 202], [335, 184]], "text": "TIRED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TIRED", "recog_valid": true, "glyph_recog_text": "TIRED", "glyph_recog_ld": 1.0}, {"polygon": [[393, 396], [393, 396], [391, 411], [391, 412], [416, 430], [425, 420]], "text": "BACK", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "BANK", "recog_valid": false, "glyph_recog_text": "BACK", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[316, 345], [318, 364], [318, 364], [357, 381], [361, 369], [361, 369]], "text": "NEVER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NEVER", "recog_valid": true, "glyph_recog_text": "NEVER", "glyph_recog_ld": 1.0}, {"polygon": [[420, 363], [416, 377], [443, 396], [448, 381]], "text": "AND", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AND", "recog_valid": true, "glyph_recog_text": "AND", "glyph_recog_ld": 1.0}, {"polygon": [[386, 342], [386, 342], [384, 356], [414, 374], [414, 374], [414, 362]], "text": "BAGS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BAGS", "recog_valid": true, "glyph_recog_text": "EAGS", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[401, 321], [401, 321], [403, 340], [435, 363], [435, 363], [434, 339]], "text": "LIKE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Like", "recog_valid": false, "glyph_recog_text": "LIKE", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[364, 234], [369, 255], [446, 285], [447, 270]], "text": "WEIGHING", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "WELGHING", "recog_valid": false, "glyph_recog_text": "WEIGHING", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[285, 212], [292, 232], [357, 248], [361, 232]], "text": "TROUBLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TROUBLES", "recog_valid": true, "glyph_recog_text": "TROUBLES", "glyph_recog_ld": 1.0}, {"polygon": [[287, 190], [284, 208], [366, 229], [366, 212]], "text": "OVERTIME.", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OVERTIME", "recog_valid": false, "glyph_recog_text": "OVERTIME.", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[372, 192], [376, 214], [395, 218], [425, 234], [446, 255], [454, 244], [414, 209]], "text": "WORKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "WORKIN", "recog_valid": false, "glyph_recog_text": "WORKING", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011794.jpg", "caption": "a skateboarder is doing a trick in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536087.jpg", "caption": "a baseball player throwing a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273951.jpg", "caption": "three people sitting on a ski lift", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011812.jpg", "caption": "a yellow train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011826.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[449, 311], [461, 334], [480, 329], [468, 303], [453, 305]], "text": "38", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "38", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011838.jpg", "caption": "a motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536127.jpg", "caption": "a bag of items including a purse, a bag of items, a bag of items, a bag of items, a bag of items, a bag of items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011842.jpg", "caption": "a yellow parking meter on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536145.jpg", "caption": "a group of people on bikes with their hands up", "annotations": [{"polygon": [[428, 309], [476, 319], [475, 277], [424, 277]], "text": "CAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CAN", "recog_valid": true, "glyph_recog_text": "CAN", "glyph_recog_ld": 1.0}, {"polygon": [[435, 309], [437, 330], [434, 358], [453, 355], [481, 363], [490, 351], [485, 327]], "text": "YOU", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "YOU", "recog_valid": true, "glyph_recog_text": "YOU", "glyph_recog_ld": 1.0}, {"polygon": [[435, 358], [426, 358], [416, 395], [451, 393], [467, 400], [483, 401], [493, 404], [498, 371], [485, 363], [456, 356]], "text": "SEEM", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SEEN", "recog_valid": false, "glyph_recog_text": "SEEM", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[425, 391], [425, 428], [479, 426], [485, 396]], "text": "NOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "NOW", "recog_valid": true, "glyph_recog_text": "NOW", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274012.jpg", "caption": "a tree with red flowers on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274019.jpg", "caption": "a black and white photo of a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274022.jpg", "caption": "a bus driving down a street in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142969.jpg", "caption": "two trains are parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536204.jpg", "caption": "a bird is standing on a wooden bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274074.jpg", "caption": "a stop sign on the side of the road", "annotations": [{"polygon": [[195, 212], [192, 256], [298, 282], [302, 232]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274079.jpg", "caption": "a man on a bike with a dog next to a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143017.jpg", "caption": "a fruit stand with many different types of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274105.jpg", "caption": "a black dog wearing a hat and sunglasses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405197.jpg", "caption": "a woman holding a baby while sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143054.jpg", "caption": "a street sign with a pedestrian crossing", "annotations": [{"polygon": [[408, 298], [416, 334], [493, 346], [480, 309]], "text": "Ave", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Ave", "recog_valid": true, "glyph_recog_text": "Ave", "glyph_recog_ld": 1.0}, {"polygon": [[477, 89], [439, 149], [417, 123], [453, 70]], "text": "DR", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Dr", "recog_valid": false, "glyph_recog_text": "D R", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[422, 178], [375, 254], [357, 282], [333, 248], [375, 178], [394, 142]], "text": "and", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "and", "recog_valid": true, "glyph_recog_text": "and", "glyph_recog_ld": 1.0}, {"polygon": [[246, 257], [246, 316], [319, 326], [309, 267]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3", "recog_valid": true, "glyph_recog_text": "3", "glyph_recog_ld": 1.0}, {"polygon": [[310, 272], [320, 301], [360, 308], [345, 276]], "text": "rd", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "rd", "recog_valid": true, "glyph_recog_text": "r d", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[279, 338], [294, 382], [322, 348], [292, 338]], "text": " H", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "工", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405207.jpg", "caption": "a bottle of wine sitting on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536280.jpg", "caption": "a red and black bus parked next to a yellow bus", "annotations": [{"polygon": [[174, 352], [174, 369], [235, 384], [236, 370], [213, 357]], "text": "scofline", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "scofline", "recog_valid": true, "glyph_recog_text": "scafline", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405209.jpg", "caption": "a sign that is in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536290.jpg", "caption": "a couple sitting on a bench near a pond", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143085.jpg", "caption": "air canada boeing 767-300 air canada", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143089.jpg", "caption": "a fire hydrant on the side of the road", "annotations": [{"polygon": [[296, 229], [332, 174], [368, 207], [329, 257]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "人", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274170.jpg", "caption": "a tray of food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405250.jpg", "caption": "a computer monitor with a lamp on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143110.jpg", "caption": "a bathroom with a sink and a toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143139.jpg", "caption": "a train traveling down the tracks with a few cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536363.jpg", "caption": "a bus sitting in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274248.jpg", "caption": "airbus a380 airbus a380 airbus a380 airbus a380 airbus a380 airbus a380 airbus a380 airbus a380", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012107.jpg", "caption": "two blue doors are on the side of a brick building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536413.jpg", "caption": "a skateboarder is doing a trick in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143198.jpg", "caption": "a baseball player swinging a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012131.jpg", "caption": "a person riding a skateboard in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274275.jpg", "caption": "a man and woman in the ocean with a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012146.jpg", "caption": "a kitchen with pots and pans on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012155.jpg", "caption": "a bus with a red and white design on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012158.jpg", "caption": "a little girl flying a kite on a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143231.jpg", "caption": "a bus with a white and orange stripe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405400.jpg", "caption": "a sink with a toothbrush and a phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274334.jpg", "caption": "a baseball player is swinging a bat at a ball", "annotations": [{"polygon": [[155, 239], [155, 210], [221, 209], [221, 238]], "text": "Bewer", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "8一", "recog_valid": false, "glyph_recog_text": "Bewer", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[222, 207], [253, 203], [254, 231], [224, 233]], "text": "27", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "27", "recog_valid": true, "glyph_recog_text": "27", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012228.jpg", "caption": "a dog sitting in the back seat of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274376.jpg", "caption": "a white truck with a red and black paint job", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012240.jpg", "caption": "a man riding an elephant down a street", "annotations": [{"polygon": [[8, 237], [11, 266], [19, 280], [28, 288], [46, 287], [37, 239]], "text": "Cola", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "Cola", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536534.jpg", "caption": "a man riding a bike on a busy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274402.jpg", "caption": "a person holding a remote control", "annotations": [{"polygon": [[275, 55], [312, 82], [306, 95], [268, 67]], "text": "POWER", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "POWER", "recog_valid": true, "glyph_recog_text": "POWER", "glyph_recog_ld": 1.0}, {"polygon": [[352, 123], [390, 148], [380, 158], [346, 135]], "text": "ON/OFF", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ON/OFF", "recog_valid": true, "glyph_recog_text": "ONAOFF", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405499.jpg", "caption": "a man sitting under a tree reading a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274430.jpg", "caption": "a baseball player standing on the mound with a crowd watching", "annotations": [{"polygon": [[371, 283], [498, 286], [498, 320], [371, 317]], "text": "MAYERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MAYERS", "recog_valid": true, "glyph_recog_text": "MAYERS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143358.jpg", "caption": "a man dressed in armor talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274437.jpg", "caption": "a baseball player throwing a pitch on a field", "annotations": [{"polygon": [[268, 180], [267, 231], [231, 230], [218, 208], [233, 178]], "text": "40", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "40", "recog_valid": true, "glyph_recog_text": "4", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143367.jpg", "caption": "a young boy wearing a jacket and tie", "annotations": [{"polygon": [[278, 336], [278, 346], [281, 355], [289, 362], [295, 366], [302, 369], [311, 369], [320, 369], [327, 366], [331, 364], [328, 356], [322, 360], [316, 362], [307, 362], [298, 361], [290, 355], [288, 348], [287, 342], [287, 337]], "text": "DUMBARTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "DISNATM", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012302.jpg", "caption": "a plane flying over a billboard with palm trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536595.jpg", "caption": "a fire hydrant on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274466.jpg", "caption": "a little girl standing on a surfboard in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143416.jpg", "caption": "a white kitchen with a checkered floor and white cabinets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012349.jpg", "caption": "a pink drink and a donut on a table", "annotations": [{"polygon": [[184, 186], [186, 206], [204, 216], [216, 219], [230, 221], [248, 222], [265, 220], [281, 216], [285, 215], [287, 195], [271, 200], [255, 201], [234, 202], [213, 199], [195, 192]], "text": "DUNKIN DONUTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PUNKIN", "recog_valid": false, "glyph_recog_text": "DUNIN DONUTS", "glyph_recog_ld": 0.25000062499947917}, {"polygon": [[186, 206], [186, 224], [204, 235], [219, 238], [242, 241], [269, 239], [280, 236], [281, 217], [263, 221], [243, 222], [223, 220], [206, 217], [196, 211]], "text": "DUNKIN DONUTS ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DONUTS", "recog_valid": false, "glyph_recog_text": "DUNKIN DONUTS", "glyph_recog_ld": 0.4615388757393264}, {"polygon": [[41, 114], [46, 140], [74, 130], [94, 118], [113, 104], [120, 98], [116, 85], [100, 84], [86, 93], [66, 105]], "text": "DUNKIN DONUTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "DUNKIN", "recog_valid": false, "glyph_recog_text": "DUAERBH DONUTS", "glyph_recog_ld": 0.21428627550980317}, {"polygon": [[45, 143], [49, 165], [79, 153], [104, 138], [122, 124], [124, 122], [121, 100], [107, 111], [87, 123], [60, 136]], "text": "DONUTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "DONUTS", "recog_valid": true, "glyph_recog_text": "DONUTS", "glyph_recog_ld": 1.0}, {"polygon": [[11, 189], [8, 201], [59, 179], [55, 171]], "text": "AMERICA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "AMERICA", "recog_valid": true, "glyph_recog_text": "MReOGp", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143436.jpg", "caption": "a bus and a truck are parked next to a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143439.jpg", "caption": "two fighter jets on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274518.jpg", "caption": "a cupcake sitting on a plate with a tea pot and silverware", "annotations": [{"polygon": [[208, 405], [221, 414], [241, 422], [262, 426], [289, 426], [291, 437], [274, 437], [259, 438], [237, 434], [221, 427], [201, 416]], "text": "ANGELINA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "VNITTONN", "recog_valid": false, "glyph_recog_text": "ANGELINA", "glyph_recog_ld": 0.2500009374988281}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536663.jpg", "caption": "a green and white bus driving down a street", "annotations": [{"polygon": [[101, 415], [101, 415], [118, 436], [248, 422], [238, 402]], "text": "SLOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SSLOW", "recog_valid": false, "glyph_recog_text": "SLO W", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012386.jpg", "caption": "kite surfers on the beach with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405606.jpg", "caption": "a large billboard with a picture of a woman on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012398.jpg", "caption": "a desk with two computers and a keyboard on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274559.jpg", "caption": "a group of people in a kitchen with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012421.jpg", "caption": "a train station with a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012428.jpg", "caption": "a cutting board with apples, a knife, and a spoon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012434.jpg", "caption": "a man skiing down a slope at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536728.jpg", "caption": "a group of jockeys and horses racing on a track", "annotations": [{"polygon": [[111, 291], [128, 344], [175, 336], [163, 288]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IC", "recog_valid": false, "glyph_recog_text": "合", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143528.jpg", "caption": "a skateboarder is in a tunnel with graffiti", "annotations": [{"polygon": [[226, 173], [223, 195], [268, 203], [272, 174]], "text": "MI5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EM", "recog_valid": false, "glyph_recog_text": "M15", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274602.jpg", "caption": "a dog eating a birthday cake", "annotations": [{"polygon": [[184, 347], [230, 326], [259, 329], [264, 334], [258, 343], [227, 357], [192, 360]], "text": "Apollo", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "oollp", "recog_valid": false, "glyph_recog_text": "Apollo", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143553.jpg", "caption": "a man standing on a white mat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143563.jpg", "caption": "people walking on the platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012494.jpg", "caption": "a street sign that says needles alley", "annotations": [{"polygon": [[122, 194], [287, 176], [289, 202], [122, 224]], "text": "NEEDLESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NEEDLESS", "recog_valid": true, "glyph_recog_text": "NEEDLESS", "glyph_recog_ld": 1.0}, {"polygon": [[323, 197], [325, 174], [398, 164], [399, 186]], "text": "ALLEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALLEY", "recog_valid": true, "glyph_recog_text": "ALLEY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274642.jpg", "caption": "a woman and a baby in a high chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405729.jpg", "caption": "a bathroom with a toilet and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274684.jpg", "caption": "a large white rhino is sitting in the back of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012544.jpg", "caption": "a white bus driving down a street with cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143616.jpg", "caption": "a train traveling down the tracks with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012556.jpg", "caption": "a parking meter on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143630.jpg", "caption": "a woman in white shirt and green skirt playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143637.jpg", "caption": "a group of people in red shirts and white ties", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274715.jpg", "caption": "a man and woman standing next to a car with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274745.jpg", "caption": "a man diving to catch a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536902.jpg", "caption": "a cake with a leopard print design and purple and blue icing", "annotations": [{"polygon": [[101, 300], [97, 301], [98, 332], [117, 372], [136, 387], [147, 348], [113, 321], [104, 314]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[299, 312], [315, 403], [180, 415], [168, 353], [266, 316]], "text": "13th", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "13th", "recog_valid": true, "glyph_recog_text": "13th", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012626.jpg", "caption": "a large building with a bridge over it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274770.jpg", "caption": "a boy holding two oranges in front of his eyes", "annotations": [{"polygon": [[47, 442], [168, 442], [168, 474], [47, 473]], "text": "CURSOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "CURSOR", "recog_valid": true, "glyph_recog_text": "CURSOR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536920.jpg", "caption": "two girls standing in the rain holding umbrellas", "annotations": [{"polygon": [[214, 105], [278, 136], [274, 158], [229, 137], [219, 124]], "text": "TRIK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TRK", "recog_valid": false, "glyph_recog_text": "TRIK", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536926.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012641.jpg", "caption": "a street sign that says do not enter", "annotations": [{"polygon": [[182, 219], [187, 196], [197, 193], [214, 185], [218, 184], [221, 186], [221, 191], [222, 197], [220, 203], [215, 209]], "text": "DO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DO", "recog_valid": true, "glyph_recog_text": "DO", "glyph_recog_ld": 1.0}, {"polygon": [[240, 177], [302, 154], [303, 158], [296, 160], [294, 182], [238, 201]], "text": "NOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NOT", "recog_valid": true, "glyph_recog_text": "NOT", "glyph_recog_ld": 1.0}, {"polygon": [[182, 277], [270, 253], [283, 250], [286, 251], [288, 256], [286, 263], [285, 266], [284, 267], [288, 279], [177, 304]], "text": "ENTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ENTER", "recog_valid": true, "glyph_recog_text": "ENTER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536932.jpg", "caption": "a man holding an umbrella while standing next to a bike", "annotations": [{"polygon": [[353, 132], [353, 132], [349, 166], [416, 176], [419, 138]], "text": "FED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Fed", "recog_valid": false, "glyph_recog_text": "FED", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[154, 405], [150, 414], [168, 422], [189, 436], [193, 433], [177, 419]], "text": "Kruizer", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "lruizer", "recog_valid": false, "glyph_recog_text": "新N", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536934.jpg", "caption": "a cat sleeping on a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536960.jpg", "caption": "two bottles of wine sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536963.jpg", "caption": "a large airplane parked on a wet runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143749.jpg", "caption": "a parking meter on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143781.jpg", "caption": "a man is standing on a set of steps talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143797.jpg", "caption": "two people on skis standing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012726.jpg", "caption": "a yellow garbage truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274876.jpg", "caption": "a man in white playing tennis on a clay court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405980.jpg", "caption": "a couple of people playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012766.jpg", "caption": "a plate with a sandwich and soup on it", "annotations": [{"polygon": [[258, 259], [328, 244], [334, 262], [261, 274]], "text": "faste", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Paste", "recog_valid": false, "glyph_recog_text": "faste", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537055.jpg", "caption": "a man and a woman are posing for a picture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537064.jpg", "caption": "a lighthouse on a pier with a sailboat in the distance", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537066.jpg", "caption": "a group of boats on the shore", "annotations": [{"polygon": [[85, 387], [89, 385], [120, 382], [146, 387], [147, 406], [141, 421], [112, 415], [101, 416]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "iot", "recog_valid": false, "glyph_recog_text": "13", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012810.jpg", "caption": "a baseball player is swinging at a pitch", "annotations": [{"polygon": [[293, 94], [294, 124], [328, 120], [327, 94]], "text": "POL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PO", "recog_valid": false, "glyph_recog_text": "POL", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143904.jpg", "caption": "a woman in a bikini posing with skis", "annotations": [{"polygon": [[85, 179], [91, 154], [206, 112], [201, 128]], "text": "UOIKI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RPR", "recog_valid": false, "glyph_recog_text": "UOIKI", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[391, 509], [511, 512], [512, 491], [448, 482], [396, 488]], "text": "Waydelich", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "sa", "recog_valid": false, "glyph_recog_text": "Waydelich", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274978.jpg", "caption": "a man walking on the beach with a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274986.jpg", "caption": "a red motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537139.jpg", "caption": "a train on the tracks", "annotations": [{"polygon": [[351, 326], [346, 348], [388, 394], [393, 366]], "text": "LINE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "E", "recog_valid": false, "glyph_recog_text": "LINE", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406068.jpg", "caption": "a red suitcase with a tag on it", "annotations": [{"polygon": [[334, 348], [334, 364], [273, 369], [222, 355], [184, 331], [188, 322], [221, 339], [259, 352], [296, 356], [334, 349]], "text": "POLO WORLD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RTHOA OTOd", "recog_valid": false, "glyph_recog_text": "POLO WORLD", "glyph_recog_ld": 0.20000079999919995}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143944.jpg", "caption": "a young girl playing tennis with a racquet", "annotations": [{"polygon": [[70, 142], [139, 75], [157, 95], [82, 176]], "text": "Watch ", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "WateL", "recog_valid": false, "glyph_recog_text": "Watch", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[121, 147], [160, 106], [163, 130], [127, 166]], "text": "out", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Dut", "recog_valid": false, "glyph_recog_text": "o u t", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[80, 222], [82, 264], [92, 264], [159, 197], [149, 188], [90, 219]], "text": "Chrissie", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "hnot", "recog_valid": false, "glyph_recog_text": "Chrissie", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[179, 435], [177, 466], [182, 473], [235, 442], [244, 425], [267, 384], [236, 396], [218, 403], [192, 415]], "text": "Staff at", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Staff at", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[90, 441], [109, 487], [182, 408], [156, 380], [106, 410]], "text": "VBTL", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "87", "recog_valid": false, "glyph_recog_text": "VBTL", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143952.jpg", "caption": "a steam train on the tracks in a small town", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012884.jpg", "caption": "a man on a skateboard doing a trick on a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143995.jpg", "caption": "a sign on a brick wall", "annotations": [{"polygon": [[78, 39], [164, 57], [153, 97], [69, 76], [78, 40]], "text": "FOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "FOR", "recog_valid": true, "glyph_recog_text": "FOR", "glyph_recog_ld": 1.0}, {"polygon": [[127, 117], [120, 153], [287, 193], [299, 153], [129, 117]], "text": "LAUGHS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LAUGHS", "recog_valid": true, "glyph_recog_text": "LAUGHS", "glyph_recog_ld": 1.0}, {"polygon": [[379, 172], [450, 190], [442, 230], [371, 209], [379, 172]], "text": "INC.", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "INC", "recog_valid": false, "glyph_recog_text": "INC.", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[127, 326], [246, 369], [235, 398], [113, 354]], "text": "SECOND", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SECOND", "recog_valid": true, "glyph_recog_text": "SECOND", "glyph_recog_ld": 1.0}, {"polygon": [[285, 379], [323, 394], [316, 411], [276, 396], [283, 378]], "text": "2000", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "2000", "recog_valid": true, "glyph_recog_text": "2000", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406145.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012930.jpg", "caption": "a sandwich on a plate", "annotations": [{"polygon": [[235, 98], [238, 81], [318, 101], [318, 114]], "text": "A BETT", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Abe", "recog_valid": false, "glyph_recog_text": "A BETT", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012933.jpg", "caption": "a man riding a bike with a blue cover", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144049.jpg", "caption": "a table with a grill, a phone and a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144056.jpg", "caption": "a group of people in a kitchen with a microphone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275130.jpg", "caption": "a beach with people and umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013000.jpg", "caption": "a large jet airplane taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144079.jpg", "caption": "a man holding a bear in a store", "annotations": [{"polygon": [[140, 105], [219, 89], [229, 143], [148, 157]], "text": "36,99", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "5699", "recog_valid": false, "glyph_recog_text": "36,99", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[383, 139], [416, 136], [418, 163], [385, 169]], "text": "109", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "109", "recog_valid": true, "glyph_recog_text": "109", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406230.jpg", "caption": "a table with a hot dog and a soda", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144089.jpg", "caption": "a red motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275168.jpg", "caption": "a school bus reflected in a side mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406244.jpg", "caption": "a man on a skateboard doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537326.jpg", "caption": "two men walking with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275190.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406263.jpg", "caption": "a young girl wearing a red headscarf stands in front of a bunch of bananas", "annotations": [{"polygon": [[291, 329], [291, 361], [313, 372], [328, 358], [324, 327]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "NI", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275206.jpg", "caption": "a bus and a bike on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144139.jpg", "caption": "a pizza on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537367.jpg", "caption": "a woman standing on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537382.jpg", "caption": "a plate with a banana sandwich and a can of peanut butter", "annotations": [{"polygon": [[98, 2], [116, 76], [169, 58], [293, 47], [362, 52], [357, -3]], "text": "PLANTERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PLANIERS", "recog_valid": false, "glyph_recog_text": "PLANTERS", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[225, 77], [225, 115], [337, 117], [337, 76]], "text": "Salted", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Salted", "recog_valid": true, "glyph_recog_text": "Salted", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144182.jpg", "caption": "a large clock on a red pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537422.jpg", "caption": "a miniature waffle iron, waffles, butter and syrup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537424.jpg", "caption": "a man eating a hot dog and a drink at a restaurant", "annotations": [{"polygon": [[390, 381], [399, 385], [403, 392], [410, 393], [417, 385], [426, 382], [434, 380], [439, 389], [447, 382], [454, 375], [468, 378], [462, 399], [452, 408], [434, 412], [410, 417], [399, 414], [390, 405], [389, 398]], "text": "Portillos", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Cortillai)", "recog_valid": false, "glyph_recog_text": "Portillos", "glyph_recog_ld": 0.6000003999996}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013144.jpg", "caption": "a refrigerator with a lot of magnets on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275327.jpg", "caption": "a large airplane sitting in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013188.jpg", "caption": "air canada airbus a320-214-2c-cw-cw-cw-cw-cw-cw-c", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013198.jpg", "caption": "a bench sitting on the sidewalk next to a street lamp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275348.jpg", "caption": "a horse race is being held in front of a large crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013215.jpg", "caption": "a small red and white airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537543.jpg", "caption": "a baseball player is standing on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275409.jpg", "caption": "a skateboarder is doing a trick on a rail", "annotations": [{"polygon": [[452, 374], [512, 375], [512, 419], [442, 419], [442, 386]], "text": "RA", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "RA", "recog_valid": true, "glyph_recog_text": "RA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144337.jpg", "caption": "a group of people flying kites in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406488.jpg", "caption": "a man in a suit and tie crossing the street with a child", "annotations": [{"polygon": [[485, 106], [485, 106], [497, 104], [501, 107], [512, 104], [511, 134], [481, 136]], "text": "Hin", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Hin", "recog_valid": true, "glyph_recog_text": "H", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013273.jpg", "caption": "a computer monitor, keyboard, mouse and a phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144345.jpg", "caption": "a man sitting at a table with a laptop and a bunch of red balls", "annotations": [{"polygon": [[128, 386], [137, 386], [160, 374], [166, 371], [169, 377], [166, 386], [130, 401], [128, 398]], "text": "n&m", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "am", "recog_valid": false, "glyph_recog_text": "n&所", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013274.jpg", "caption": "a yellow tow truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013278.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013284.jpg", "caption": "a film strip with two teddy bears and a candle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144358.jpg", "caption": "a man holding a bottle of wine in front of an open refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013318.jpg", "caption": "police officers at the corner of flinders and flinders street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144391.jpg", "caption": "a man sitting on a bench", "annotations": [{"polygon": [[-2, 290], [120, 272], [129, 323], [0, 348]], "text": "LEXUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "a", "recog_valid": false, "glyph_recog_text": "LEXUS", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537608.jpg", "caption": "a man in a pink shirt and tie talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013325.jpg", "caption": "a fire hydrant in the grass next to a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406543.jpg", "caption": "a pan with vegetables and tofu in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275492.jpg", "caption": "a large truck with a large advertisement on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013355.jpg", "caption": "a man working on a surfboard in a workshop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144431.jpg", "caption": "a pizza on a table with a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013362.jpg", "caption": "two baseball players walking on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013364.jpg", "caption": "a group of people on motorcycles in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013372.jpg", "caption": "boats in the harbor of a small town", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537667.jpg", "caption": "a black and white photograph of a baseball player sliding into home", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144453.jpg", "caption": "a highway sign with a truck driving on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275529.jpg", "caption": "a bed with a white and red blanket", "annotations": [{"polygon": [[263, 238], [290, 268], [294, 265], [294, 260], [267, 234]], "text": "LIVERPOOL STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NOECIRE", "recog_valid": false, "glyph_recog_text": "r中", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275530.jpg", "caption": "a large airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537685.jpg", "caption": "a pair of stuffed bears sitting on a book shelf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013397.jpg", "caption": "a black and white photo of a biplane flying over a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275544.jpg", "caption": "two people riding motorcycles on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537687.jpg", "caption": "a little boy and a little girl in a toy truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144480.jpg", "caption": "a railroad crossing sign on a pole in the middle of a city", "annotations": [{"polygon": [[138, 183], [150, 200], [276, 71], [261, 53]], "text": "CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CROSSING", "recog_valid": true, "glyph_recog_text": "CROSSING", "glyph_recog_ld": 1.0}, {"polygon": [[139, 80], [156, 58], [274, 187], [259, 206]], "text": "RAILROAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RAOROAD", "recog_valid": false, "glyph_recog_text": "RAILROAD", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275556.jpg", "caption": "a woman in a purple sweatshirt holding a cake", "annotations": [{"polygon": [[319, 274], [302, 287], [291, 298], [289, 307], [307, 321], [311, 309], [322, 299], [328, 295]], "text": "ECI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SC", "recog_valid": false, "glyph_recog_text": "ECI", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406628.jpg", "caption": "two teddy bears sitting in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144484.jpg", "caption": "a bunch of bananas are on display at a market", "annotations": [{"polygon": [[208, 162], [244, 170], [251, 178], [248, 203], [236, 213], [213, 214], [207, 213], [197, 205]], "text": "40", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LO", "recog_valid": false, "glyph_recog_text": "导", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144488.jpg", "caption": "a steam train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406646.jpg", "caption": "a large indoor garden with lots of plants and potted plants", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537729.jpg", "caption": "a train crossing a bridge over a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275604.jpg", "caption": "a black truck with a grill on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275612.jpg", "caption": "a toilet with a seat and a bidet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275630.jpg", "caption": "a pizza with a large piece missing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275631.jpg", "caption": "two green double decker buses parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013489.jpg", "caption": "a busy street with cars and buses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144561.jpg", "caption": "a southwest airlines plane on the tarmac", "annotations": [{"polygon": [[434, 219], [444, 226], [480, 162], [474, 149]], "text": "SOUTHWEST", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SOUTHWEST", "recog_valid": true, "glyph_recog_text": "SOUTHAEST", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406708.jpg", "caption": "a woman standing next to a stop sign", "annotations": [{"polygon": [[213, 291], [213, 291], [299, 283], [302, 326], [213, 335]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013497.jpg", "caption": "pakistan, karachi, pakistan, pakistan, pakistan, pakistan, pakistan, pakistan, p", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275642.jpg", "caption": "a parking meter covered in snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144574.jpg", "caption": "a man and woman standing next to a sign", "annotations": [{"polygon": [[193, 110], [196, 122], [231, 98], [228, 87]], "text": "SQUARE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "1ON2", "recog_valid": false, "glyph_recog_text": "SOUARE", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013524.jpg", "caption": "a stop sign with a street sign on top of it", "annotations": [{"polygon": [[144, 311], [228, 370], [262, 333], [262, 333], [178, 276]], "text": "ALTO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALTO", "recog_valid": true, "glyph_recog_text": "ALTO", "glyph_recog_ld": 1.0}, {"polygon": [[169, 335], [205, 358], [195, 373], [158, 349]], "text": "BULLYING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BULLYING", "recog_valid": true, "glyph_recog_text": "EULPINE", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144597.jpg", "caption": "a man is fixing a motorcycle on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144599.jpg", "caption": "a man and woman sitting on a bench outside a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013540.jpg", "caption": "a man surfing on a wave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144618.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013568.jpg", "caption": "four wine glasses with different labels on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537862.jpg", "caption": "a black remote control", "annotations": [{"polygon": [[148, 362], [151, 380], [208, 364], [201, 349]], "text": "ahhhh", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ahhhh", "recog_valid": true, "glyph_recog_text": "shhhh", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013576.jpg", "caption": "a man and woman sitting at a table with food", "annotations": [{"polygon": [[419, 280], [459, 312], [450, 326], [407, 293]], "text": "LIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LIVE", "recog_valid": true, "glyph_recog_text": "LIVE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275741.jpg", "caption": "a black and white photo of people at a market", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275758.jpg", "caption": "a bus driving down a street with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013616.jpg", "caption": "a motorcycle with gold paint on it is on display", "annotations": [{"polygon": [[401, 244], [401, 244], [399, 249], [405, 254], [409, 258], [416, 259], [423, 262], [431, 267], [439, 275], [445, 272], [447, 262], [438, 255], [432, 252], [421, 250], [414, 250], [405, 246]], "text": "GGER", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "gaGBR", "recog_valid": false, "glyph_recog_text": "GGER", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144727.jpg", "caption": "a model of a truck parked near a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406895.jpg", "caption": "a group of people sitting on a ledge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275830.jpg", "caption": "a car driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275839.jpg", "caption": "a baseball player on the field with a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406920.jpg", "caption": "a street with cars and a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538001.jpg", "caption": "a yellow and black sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013717.jpg", "caption": "a young boy in a purple shirt and purple pants is throwing a baseball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013720.jpg", "caption": "a plate of food with an egg, a glass of milk and a jar of jam", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275868.jpg", "caption": "a street sign with a red light and a stop sign", "annotations": [{"polygon": [[212, 165], [420, 176], [418, 223], [208, 208]], "text": "Wellington", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wellington", "recog_valid": true, "glyph_recog_text": "Wellington", "glyph_recog_ld": 1.0}, {"polygon": [[325, 277], [355, 278], [354, 313], [324, 312]], "text": "B", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "雅", "recog_valid": false, "glyph_recog_text": "B", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275870.jpg", "caption": "a double decker bus with the words book as intency course on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013731.jpg", "caption": "a table with fruit, bananas, and other ingredients", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144812.jpg", "caption": "we are beginning our desert by james neek", "annotations": [{"polygon": [[209, 326], [323, 308], [322, 289], [275, 281], [237, 286], [205, 295]], "text": "BEGINNING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REGINNIN", "recog_valid": false, "glyph_recog_text": "BEGINNING", "glyph_recog_ld": 0.7777780246910837}, {"polygon": [[159, 330], [197, 325], [196, 300], [181, 303], [157, 317]], "text": "NOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NOW", "recog_valid": true, "glyph_recog_text": "NOW", "glyph_recog_ld": 1.0}, {"polygon": [[205, 366], [207, 388], [239, 386], [304, 375], [308, 350], [286, 347], [256, 350], [244, 351]], "text": "DESCENT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DESCENT", "recog_valid": true, "glyph_recog_text": "DESCENT", "glyph_recog_ld": 1.0}, {"polygon": [[330, 298], [337, 372], [363, 367], [355, 296]], "text": "MELTZER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MELTZER", "recog_valid": true, "glyph_recog_text": "EWJNU", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[386, 263], [391, 296], [436, 272], [436, 258], [409, 258]], "text": "Guest", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Guest", "recog_valid": true, "glyph_recog_text": "Guest", "glyph_recog_ld": 1.0}, {"polygon": [[348, 233], [356, 271], [392, 257], [433, 252], [463, 231], [462, 213], [446, 216], [416, 232], [377, 243], [375, 226]], "text": "Counterfeit", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Coittrrefeil", "recog_valid": false, "glyph_recog_text": "Counterfeit", "glyph_recog_ld": 0.5833336805552662}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406982.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013770.jpg", "caption": "a black cat sitting on a bookshelf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275917.jpg", "caption": "a man sitting under an umbrella with a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275939.jpg", "caption": "a large airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013797.jpg", "caption": "pakistan's prime minister, yasir raza, addresses the media in pakistan's capital, Islamabad, on july 1, 2013", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144881.jpg", "caption": "a living room with a television, a chair, a table, and a plant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538099.jpg", "caption": "a horse with a bridle on its head", "annotations": [{"polygon": [[191, 469], [181, 498], [284, 500], [299, 472]], "text": "Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Photography", "recog_valid": true, "glyph_recog_text": "Photography", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144884.jpg", "caption": "three glass bottles with flowers in them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538105.jpg", "caption": "a steam train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538108.jpg", "caption": "a person sitting at a desk with a keyboard, mouse, and a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275969.jpg", "caption": "a black wall with graffiti on it", "annotations": [{"polygon": [[452, 214], [453, 214], [470, 211], [475, 229], [486, 221], [510, 222], [512, 237], [499, 242], [461, 243]], "text": "Bolck", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Bolck", "recog_valid": true, "glyph_recog_text": "Bolck", "glyph_recog_ld": 1.0}, {"polygon": [[455, 247], [460, 280], [481, 275], [486, 291], [504, 291], [502, 286], [511, 286], [509, 263], [477, 264], [472, 247]], "text": "Boys", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Boy", "recog_valid": false, "glyph_recog_text": "Boys", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[302, 184], [337, 189], [336, 214], [303, 210]], "text": "IndiA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tndia", "recog_valid": false, "glyph_recog_text": "IndiA", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[325, 216], [332, 289], [374, 297], [381, 288], [378, 221], [360, 213]], "text": "SMAC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "0E<", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[184, 197], [189, 332], [316, 315], [321, 210]], "text": "STUPID", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "专服", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[0, 172], [0, 324], [168, 325], [169, 174]], "text": "STUPID", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "etprn", "recog_valid": false, "glyph_recog_text": "STUPID", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538116.jpg", "caption": "a purse with a cell phone, a wallet, a bottle of water, a bottle of perfume, a bottle of shampoo, a bottle of conditioner, a", "annotations": [{"polygon": [[93, 42], [86, 173], [55, 173], [68, 41]], "text": "cloth paper", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "ededyjop", "recog_valid": false, "glyph_recog_text": "9-0--", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[130, 40], [129, 97], [95, 84], [97, 32]], "text": "Studio", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "9YS", "recog_valid": false, "glyph_recog_text": "0-s", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[127, 104], [127, 181], [92, 174], [103, 94]], "text": "Somerset", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "goihanoe", "recog_valid": false, "glyph_recog_text": "静息与心", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[113, 240], [112, 290], [50, 289], [50, 243]], "text": "52", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "52", "recog_valid": true, "glyph_recog_text": "52", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538120.jpg", "caption": "a woman stands on the platform waiting for a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144907.jpg", "caption": "a bus and a red light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144906.jpg", "caption": "a young boy in a red shirt and white pants is running to first base", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407080.jpg", "caption": "a young boy in a red shirt is holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013868.jpg", "caption": "a luggage cart with luggage on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538167.jpg", "caption": "a stop sign on a street corner", "annotations": [{"polygon": [[183, 153], [304, 154], [312, 166], [310, 177], [288, 209], [181, 211], [172, 208], [166, 195], [170, 165], [175, 155]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538170.jpg", "caption": "a large airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276037.jpg", "caption": "a donut and a cup of green tea", "annotations": [{"polygon": [[383, 357], [419, 339], [425, 353], [395, 369], [384, 361]], "text": "TULLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "TULLY", "recog_valid": true, "glyph_recog_text": "TULLY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407130.jpg", "caption": "a man in a shopping cart is jumping over a skateboarder", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276070.jpg", "caption": "a black motorcycle parked in front of a garage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276105.jpg", "caption": "a black and white drawing of a kite", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013979.jpg", "caption": "a jockey is sitting on a horse in the fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276128.jpg", "caption": "a man standing in front of a sign that says welcome to the village", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538272.jpg", "caption": "a man walking down the street with a kite", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013995.jpg", "caption": "a kite with a long tail", "annotations": [{"polygon": [[268, 209], [280, 194], [300, 212], [291, 228]], "text": "138", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "138", "recog_valid": true, "glyph_recog_text": "138", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538285.jpg", "caption": "a large airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145078.jpg", "caption": "two black and white planes flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276164.jpg", "caption": "a teddy bear wearing a pink hat and scarf sits on a shelf with a stack of black and white ribbons", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276197.jpg", "caption": "a man in white playing tennis on a blue court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145127.jpg", "caption": "a woman in red shirt holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145129.jpg", "caption": "a bicycle with a box on it with books and other items", "annotations": [{"polygon": [[130, 344], [153, 380], [153, 380], [176, 381], [150, 344], [150, 344]], "text": "VD911", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "T160", "recog_valid": false, "glyph_recog_text": "VD811", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014083.jpg", "caption": "a group of young boys playing soccer on a field", "annotations": [{"polygon": [[314, 187], [339, 232], [359, 216], [335, 171]], "text": "99", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "85", "recog_valid": false, "glyph_recog_text": "99", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145161.jpg", "caption": "a woman wearing a hat on her head", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276244.jpg", "caption": "a boy playing a video game on a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014103.jpg", "caption": "a snowboarder is jumping over a snow mound", "annotations": [{"polygon": [[205, 226], [233, 260], [254, 289], [273, 312], [296, 338], [277, 340], [256, 318], [241, 302], [218, 276], [198, 253], [186, 235]], "text": "FORUM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FORUM", "recog_valid": true, "glyph_recog_text": "FORUM", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014113.jpg", "caption": "a surfer riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145186.jpg", "caption": "a man sitting at a table with croissants and coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145189.jpg", "caption": "a man holding a hot dog in a box", "annotations": [{"polygon": [[355, 147], [366, 146], [376, 149], [382, 153], [386, 163], [403, 137], [400, 130], [397, 124], [392, 120], [382, 118], [373, 117], [365, 119], [355, 138]], "text": "JCREW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JCREN", "recog_valid": false, "glyph_recog_text": "JCREW", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538407.jpg", "caption": "a sign for public market farmers market", "annotations": [{"polygon": [[112, 175], [109, 232], [311, 167], [310, 102]], "text": "PUBLIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PUBIIC", "recog_valid": false, "glyph_recog_text": "PUBLIC", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[212, 338], [213, 381], [480, 362], [485, 348], [483, 314]], "text": "FARMERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FARMERS", "recog_valid": true, "glyph_recog_text": "FARMERS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276267.jpg", "caption": "a street sign on a pole with a street light", "annotations": [{"polygon": [[257, 260], [257, 260], [342, 221], [346, 239], [262, 275]], "text": "TELEGRAPH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TELEGRAPH", "recog_valid": true, "glyph_recog_text": "TELEGRAPH", "glyph_recog_ld": 1.0}, {"polygon": [[221, 243], [129, 147], [132, 134], [224, 228]], "text": "GREENWICH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GREENWICH", "recog_valid": true, "glyph_recog_text": "GREENWICR", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276283.jpg", "caption": "a man and woman are standing outside of a building", "annotations": [{"polygon": [[224, 80], [221, 102], [281, 111], [284, 87]], "text": "OPEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "OPEN", "recog_valid": true, "glyph_recog_text": "OPEN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276295.jpg", "caption": "a bathroom sink with toothbrushes and toothpaste", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014152.jpg", "caption": "a man and a woman walking down a street with a camel", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276311.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014180.jpg", "caption": "a man sitting on a chair with a snowboard", "annotations": [{"polygon": [[263, 109], [296, 189], [304, 187], [271, 107]], "text": "BURTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NOLHNA", "recog_valid": false, "glyph_recog_text": "suhton", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538470.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145262.jpg", "caption": "a table with fruit and vegetables on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014203.jpg", "caption": "a group of people in chef hats preparing food", "annotations": [{"polygon": [[131, 169], [158, 195], [155, 204], [126, 181]], "text": "PIZZA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PIZZA", "recog_valid": true, "glyph_recog_text": "PIZZA", "glyph_recog_ld": 1.0}, {"polygon": [[114, 173], [163, 211], [161, 222], [112, 184]], "text": "EXPRESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EXPRESS", "recog_valid": true, "glyph_recog_text": "安站中镜毛术艺", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014252.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145343.jpg", "caption": "a bus is parked in a museum with a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276417.jpg", "caption": "a young boy swinging a baseball bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014281.jpg", "caption": "a man is smiling", "annotations": [{"polygon": [[283, 455], [267, 475], [269, 480], [279, 483], [317, 483], [316, 494], [336, 493], [335, 486], [426, 483], [419, 455]], "text": "doughnut", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "loufhnut", "recog_valid": false, "glyph_recog_text": "doughnut", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014283.jpg", "caption": "a baseball player holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538574.jpg", "caption": "a woman holding up two laptops with the words windows 8 on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145381.jpg", "caption": "a stuffed animal with a halloween hat sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014312.jpg", "caption": "a man and woman walking down a sidewalk at night", "annotations": [{"polygon": [[416, 133], [425, 205], [468, 180], [494, 134], [440, 133]], "text": "Rohr", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "go", "recog_valid": false, "glyph_recog_text": "Rohr", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276459.jpg", "caption": "two girls playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276460.jpg", "caption": "a woman holding a cake with candles on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014320.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[193, 222], [187, 268], [315, 276], [319, 232]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[465, 257], [472, 286], [511, 278], [511, 248]], "text": "KI", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "KIN", "recog_valid": false, "glyph_recog_text": "K!", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538609.jpg", "caption": "a desk with a laptop and a bag of books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014337.jpg", "caption": "a table topped with hot dogs and drinks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538640.jpg", "caption": "a busy city street with many people walking and driving", "annotations": [{"polygon": [[35, 86], [33, 141], [39, 144], [41, 134], [71, 152], [71, 164], [80, 166], [82, 158], [106, 172], [104, 134]], "text": "THE LION KING", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "IaE", "recog_valid": false, "glyph_recog_text": "THIELKON KINO", "glyph_recog_ld": 0.07692378698170232}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014367.jpg", "caption": "a skateboarder is riding a ramp at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276514.jpg", "caption": "a cat wearing a green tie and a green cup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145460.jpg", "caption": "a plate of food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276565.jpg", "caption": "a traffic light on a street with cars and a mountain in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014446.jpg", "caption": "a white teddy bear sitting on a shelf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276590.jpg", "caption": "a little boy brushing his teeth in a bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145518.jpg", "caption": "a sign on a street with a car in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407671.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145538.jpg", "caption": "a truck is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014477.jpg", "caption": "a black and white photo of a street with horse drawn carriages", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145551.jpg", "caption": "a white and gold jet airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145570.jpg", "caption": "a man standing in front of a white truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014502.jpg", "caption": "a man swinging a bat at a baseball in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538805.jpg", "caption": "two men standing in a kitchen talking", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145593.jpg", "caption": "a young boy wearing a baseball glove", "annotations": [{"polygon": [[176, 145], [178, 179], [210, 181], [211, 174], [201, 156], [185, 145], [176, 143]], "text": "A's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "A's", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[194, 272], [212, 269], [233, 281], [248, 296], [245, 301], [226, 300], [195, 271]], "text": "Rawbings", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Aaubngs", "recog_valid": false, "glyph_recog_text": "Rawbing", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538809.jpg", "caption": "a group of people standing outside a running room", "annotations": [{"polygon": [[152, 90], [152, 90], [153, 159], [153, 159], [199, 172], [199, 172], [252, 187], [251, 161], [186, 137], [185, 103]], "text": "Running", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RE", "recog_valid": false, "glyph_recog_text": "Ruanitng", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[185, 106], [188, 138], [255, 162], [255, 178], [267, 180], [268, 134]], "text": " unning", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "unn", "recog_valid": false, "glyph_recog_text": "animngg", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407739.jpg", "caption": "a baseball player swinging his bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407743.jpg", "caption": "a snowboard with a picture of a person on it", "annotations": [{"polygon": [[294, 236], [302, 243], [261, 278], [255, 271]], "text": "BIRTONO ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BURTONG", "recog_valid": false, "glyph_recog_text": "SH2", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145610.jpg", "caption": "a cell phone sitting on a table with a charger and cables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538838.jpg", "caption": "a fire hydrant with a sign on it", "annotations": [{"polygon": [[294, 336], [282, 341], [287, 351], [289, 365], [289, 382], [281, 398], [269, 408], [278, 421], [292, 410], [303, 390], [306, 371], [304, 355]], "text": "SERVICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SERVICN", "recog_valid": false, "glyph_recog_text": "30IA83S", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276696.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145631.jpg", "caption": "a baseball player holding a bat on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276703.jpg", "caption": "a police car parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407779.jpg", "caption": "a group of people standing around a train with stuffed animals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145645.jpg", "caption": "a red bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276739.jpg", "caption": "a row of motorcycles lined up in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145690.jpg", "caption": "a woman is standing outside of a shop", "annotations": [{"polygon": [[143, 149], [150, 166], [169, 159], [191, 154], [199, 152], [196, 134], [169, 140], [153, 144]], "text": "OPP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OPP", "recog_valid": true, "glyph_recog_text": "OPP", "glyph_recog_ld": 1.0}, {"polygon": [[214, 135], [212, 154], [224, 155], [234, 156], [242, 157], [254, 162], [274, 169], [283, 151], [258, 142], [236, 137]], "text": "SHOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SHOP", "recog_valid": true, "glyph_recog_text": "SHOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145718.jpg", "caption": "a man riding a skateboard down a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538938.jpg", "caption": "a patio with tables and chairs under umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276794.jpg", "caption": "a clock on a building", "annotations": [{"polygon": [[392, 150], [393, 179], [312, 174], [313, 144]], "text": "EASON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EASON", "recog_valid": true, "glyph_recog_text": "EASON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014653.jpg", "caption": "two children sitting in bed with laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407869.jpg", "caption": "a group of skateboards are sitting on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538944.jpg", "caption": "a shelf with bananas and other items in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145741.jpg", "caption": "two computers and a keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145757.jpg", "caption": "a baseball player throwing a ball on a field", "annotations": [{"polygon": [[376, 217], [376, 251], [512, 250], [512, 216]], "text": "GATORLAND", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "GATORLAND", "recog_valid": true, "glyph_recog_text": "GATORLAND", "glyph_recog_ld": 1.0}, {"polygon": [[0, 230], [0, 256], [70, 246], [68, 226]], "text": "SPORTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SPORTS", "recog_valid": true, "glyph_recog_text": "SPORTS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145761.jpg", "caption": "a red double decker bus drives past a building under construction", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538985.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538990.jpg", "caption": "a person holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276848.jpg", "caption": "a stop sign with a sticker on it", "annotations": [{"polygon": [[171, 198], [166, 265], [339, 268], [339, 199]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276853.jpg", "caption": "a stop sign on a snowy street at night", "annotations": [{"polygon": [[310, 250], [308, 225], [315, 219], [385, 215], [393, 221], [393, 229], [388, 234], [379, 237], [379, 251], [313, 253]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407926.jpg", "caption": "a small plane sitting on top of a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014709.jpg", "caption": "a baseball player sliding into home plate", "annotations": [{"polygon": [[414, 97], [494, 101], [492, 180], [416, 180]], "text": "CDM", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "@", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276870.jpg", "caption": "a man holding a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014736.jpg", "caption": "a tall building with a clock on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145809.jpg", "caption": "a group of people standing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407952.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014740.jpg", "caption": "a red fire hydrant on the side of a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407963.jpg", "caption": "a boy eating a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539041.jpg", "caption": "a bathroom counter with a vase, toothbrush, and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014764.jpg", "caption": "a laptop, a book and a magazine on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539053.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[160, 238], [172, 254], [187, 247], [191, 242], [189, 236], [180, 224], [171, 228], [162, 233]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "25", "recog_valid": true, "glyph_recog_text": "品", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276909.jpg", "caption": "royal enfield bullet 350", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014774.jpg", "caption": "a bathroom with a sink, toilet and shower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539068.jpg", "caption": "a man standing next to a bump sign", "annotations": [{"polygon": [[76, 251], [234, 234], [235, 234], [240, 280], [79, 299]], "text": "BUMP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BUMP", "recog_valid": true, "glyph_recog_text": "BUMP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014795.jpg", "caption": "a soccer game is being played on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000408016.jpg", "caption": "a small airplane flying over a motorcycle on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276956.jpg", "caption": "a man and a woman cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276961.jpg", "caption": "a bathroom mirror with a mirror and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145898.jpg", "caption": "a man is making donuts at a food stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014824.jpg", "caption": "a group of people dressed up as zombies", "annotations": [{"polygon": [[51, 145], [135, 59], [177, 102], [84, 201], [51, 158]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[281, 512], [297, 472], [320, 497], [313, 512]], "text": "OF", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "3O", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[202, 476], [231, 410], [237, 414], [209, 479]], "text": "CITYOFTHEDEADH", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "UPVRPTHOOTANU", "recog_valid": false, "glyph_recog_text": "N", "glyph_recog_ld": 0.07692378698170232}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276975.jpg", "caption": "a green food truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277003.jpg", "caption": "a woman is leaning against a wall with a clock on it", "annotations": [{"polygon": [[46, 265], [52, 294], [78, 283], [72, 254]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Q", "recog_valid": false, "glyph_recog_text": "10", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277013.jpg", "caption": "a young boy with a white tank top holding a wii remote", "annotations": [{"polygon": [[272, 490], [274, 501], [298, 492], [313, 481], [323, 469], [327, 461], [324, 455], [320, 459], [317, 463], [310, 470], [302, 476], [292, 482]], "text": "BATMAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "母ATNA", "recog_valid": false, "glyph_recog_text": "BATMAN", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539167.jpg", "caption": "a baseball player is trying to catch a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014886.jpg", "caption": "a white and blue bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277038.jpg", "caption": "a woman sitting on the sidewalk with a sign and a cat in a stroller", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277047.jpg", "caption": "a group of people riding dirt bikes on a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145994.jpg", "caption": "a traffic light is on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014925.jpg", "caption": "a crowd of people standing on the platform waiting for a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277070.jpg", "caption": "a cat in a plastic bag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145999.jpg", "caption": "a double decker bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539214.jpg", "caption": "a red and white train", "annotations": [{"polygon": [[320, 148], [319, 174], [373, 162], [374, 132]], "text": "REGIO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REGIO", "recog_valid": true, "glyph_recog_text": "REGIO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014938.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539235.jpg", "caption": "a bunch of bananas hanging on a rack", "annotations": [{"polygon": [[161, 207], [167, 249], [272, 253], [271, 205]], "text": "Mufi", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6", "recog_valid": false, "glyph_recog_text": "Mufi", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539263.jpg", "caption": "a black dog is looking out the door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277122.jpg", "caption": "a man and a woman standing next to a fire hydrant", "annotations": [{"polygon": [[367, 87], [367, 87], [368, 104], [511, 94], [511, 73]], "text": "HAMBURGER", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "HAMBURGER", "recog_valid": true, "glyph_recog_text": "HAMBURGER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014985.jpg", "caption": "three people holding surfboards in front of a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277134.jpg", "caption": "a clock on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277150.jpg", "caption": "a man and a woman standing next to a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277161.jpg", "caption": "an apple computer sitting on top of a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539313.jpg", "caption": "three people standing on a beach holding surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015049.jpg", "caption": "a box of donuts with different flavors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277204.jpg", "caption": "a small white and red airplane flying through the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539371.jpg", "caption": "a view of the neon sign of a restaurant in new york city", "annotations": [{"polygon": [[411, 323], [432, 361], [446, 357], [458, 343], [469, 336], [506, 327], [512, 324], [512, 297], [482, 302], [442, 310]], "text": "STAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SA", "recog_valid": false, "glyph_recog_text": "STAR", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[326, 344], [316, 354], [312, 365], [314, 371], [324, 373], [387, 362], [402, 359], [409, 352], [407, 342], [358, 345]], "text": "Ellen's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Elen's", "recog_valid": false, "glyph_recog_text": "Ellen's", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[180, 314], [175, 325], [170, 343], [174, 342], [183, 327], [216, 305], [224, 293], [224, 278]], "text": "STAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ORUPO", "recog_valid": false, "glyph_recog_text": "STAR", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[335, 221], [321, 253], [413, 315], [445, 305]], "text": "STAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "C", "recog_valid": false, "glyph_recog_text": "STOR", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[317, 268], [320, 323], [406, 326], [411, 315], [339, 276]], "text": "STAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SHM", "recog_valid": false, "glyph_recog_text": "STAR", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539376.jpg", "caption": "a herd of sheep grazing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015107.jpg", "caption": "a blender sitting on a counter next to a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015109.jpg", "caption": "a black and white photo of a bike parked on a railing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539395.jpg", "caption": "a blue train traveling down a track next to trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015122.jpg", "caption": "a baseball player in the middle of throwing a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539434.jpg", "caption": "people walking down a street in an asian city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015151.jpg", "caption": "a man in a suit and tie on stage", "annotations": [{"polygon": [[95, 99], [271, 130], [270, 274], [91, 250]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FT", "recog_valid": false, "glyph_recog_text": "R", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146237.jpg", "caption": "a parking meter on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277319.jpg", "caption": "a large airplane with a colorful design on it", "annotations": [{"polygon": [[136, 257], [129, 274], [186, 285], [191, 275], [193, 274], [196, 276], [198, 275], [199, 272], [197, 270], [187, 270], [175, 266], [177, 262], [174, 260], [164, 259], [141, 256], [140, 255]], "text": "Better", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Better", "recog_valid": true, "glyph_recog_text": "Better", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277321.jpg", "caption": "a group of people on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015185.jpg", "caption": "a hot dog with cheese and chips", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277334.jpg", "caption": "a group of people walking down a street with clocks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015193.jpg", "caption": "a man in a pink shirt and tie talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015197.jpg", "caption": "a computer with a microphone and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015198.jpg", "caption": "a woman is sitting on the grass with a dog that says i'm adopted", "annotations": [{"polygon": [[3, 222], [110, 223], [108, 317], [3, 319]], "text": "I'M", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "M", "recog_valid": false, "glyph_recog_text": "I'M", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[142, 222], [509, 222], [507, 319], [144, 319]], "text": "ADOPTED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "abOPeD", "recog_valid": false, "glyph_recog_text": "ADOPTED", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539494.jpg", "caption": "a young boy holding a baseball bat", "annotations": [{"polygon": [[217, 421], [221, 442], [247, 450], [261, 450], [285, 454], [301, 454], [308, 445], [296, 404], [288, 400], [275, 398]], "text": "VTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TS", "recog_valid": false, "glyph_recog_text": "VTS", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000408426.jpg", "caption": "a jet boat racing on a lake with a house in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015219.jpg", "caption": "a large pizza on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146294.jpg", "caption": "a tennis player is about to hit a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015239.jpg", "caption": "a man in a police uniform standing in front of a shelf of drinks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277384.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015269.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015273.jpg", "caption": "a military aircraft and jeep in a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277418.jpg", "caption": "a little girl in a hoodie eating a bowl of cereal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277432.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539596.jpg", "caption": "a woman in a blue apron is preparing food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015311.jpg", "caption": "a digital clock on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277470.jpg", "caption": "a row of bicycles parked in front of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146401.jpg", "caption": "a white and red bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000408557.jpg", "caption": "a train traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146432.jpg", "caption": "a black and white photo of a railroad track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000408576.jpg", "caption": "a woman with red hair combing her hair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146439.jpg", "caption": "a cat sitting on top of a music equipment", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146440.jpg", "caption": "a male tennis player is holding his racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015379.jpg", "caption": "a scooter parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015382.jpg", "caption": "a man in green shirt and black shorts playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015394.jpg", "caption": "photo of new york yankees game, september 1, 2012", "annotations": [{"polygon": [[100, 128], [96, 172], [370, 196], [375, 153]], "text": "MODELLS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MODELLS", "recog_valid": true, "glyph_recog_text": "MODELLS", "glyph_recog_ld": 1.0}, {"polygon": [[95, 178], [371, 202], [369, 222], [92, 199]], "text": "SPORTINGGOODS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPORTINGCODS", "recog_valid": false, "glyph_recog_text": "SPORTINGGOODS", "glyph_recog_ld": 0.8461539644969504}, {"polygon": [[419, 153], [416, 174], [494, 184], [497, 162]], "text": "Cotta", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Cotta", "recog_valid": true, "glyph_recog_text": "Cotta", "glyph_recog_ld": 1.0}, {"polygon": [[425, 205], [423, 228], [483, 235], [486, 212]], "text": "Mo's", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Mo's", "recog_valid": true, "glyph_recog_text": "Mo's", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146469.jpg", "caption": "three snowboarders are going down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015400.jpg", "caption": "a black and white photo of a truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277544.jpg", "caption": "a laptop computer sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146481.jpg", "caption": "a child sitting in a chair reading a newspaper", "annotations": [{"polygon": [[229, 268], [222, 281], [280, 305], [289, 290]], "text": "netBettos", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "hetBeroog", "recog_valid": false, "glyph_recog_text": "netBettos", "glyph_recog_ld": 0.5555560493821674}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277562.jpg", "caption": "a jet fighter sitting on the runway with people around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015427.jpg", "caption": "three boys sitting on chairs", "annotations": [{"polygon": [[238, 281], [231, 255], [257, 223], [264, 245]], "text": "DMN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OL", "recog_valid": false, "glyph_recog_text": "DMN", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[268, 243], [258, 221], [287, 209], [297, 228]], "text": "BAIL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BAIL", "recog_valid": true, "glyph_recog_text": "BAL", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[80, 374], [80, 364], [120, 344], [122, 353]], "text": "SUPER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SUPER", "recog_valid": true, "glyph_recog_text": "EUFSR", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[81, 390], [81, 379], [120, 360], [125, 370]], "text": "BOWL", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "BOWL", "recog_valid": true, "glyph_recog_text": "w:", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539719.jpg", "caption": "a laptop sitting on a desk with a poster on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277594.jpg", "caption": "an old truck is parked on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015451.jpg", "caption": "a couple of scooters parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146533.jpg", "caption": "a bus stop sign on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015496.jpg", "caption": "a couple of people on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277653.jpg", "caption": "a man standing next to an old airplane in a museum", "annotations": [{"polygon": [[352, 234], [332, 293], [411, 345], [511, 365], [511, 281], [437, 242]], "text": "HOX", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "POXL", "recog_valid": false, "glyph_recog_text": "HOX", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146582.jpg", "caption": "a large blue and white airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277677.jpg", "caption": "a train traveling down a track in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146626.jpg", "caption": "a baseball player is about to hit the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277700.jpg", "caption": "a suitcase that is open on the floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539848.jpg", "caption": "a bus driving down a street next to a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015564.jpg", "caption": "two men in suits standing in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015582.jpg", "caption": "a young boy holding a cell phone in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146656.jpg", "caption": "a bus is stopped at a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277729.jpg", "caption": "a large billboard with a car on it in front of a busy city street", "annotations": [{"polygon": [[307, 114], [309, 131], [352, 118], [350, 100]], "text": "MPG.", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "MPG.", "recog_valid": true, "glyph_recog_text": "MPG.", "glyph_recog_ld": 1.0}, {"polygon": [[157, 249], [157, 220], [245, 229], [244, 253], [243, 259], [231, 260], [231, 252]], "text": "Subway", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Subway", "recog_valid": true, "glyph_recog_text": "Subway", "glyph_recog_ld": 1.0}, {"polygon": [[10, 230], [10, 249], [83, 231], [101, 228], [101, 199]], "text": "FOOTACTION", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "FOOTACTIOV", "recog_valid": false, "glyph_recog_text": "FOOTACTION", "glyph_recog_ld": 0.9000000999999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539879.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000408829.jpg", "caption": "a bathroom with penguins on the walls and a blue rug", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146700.jpg", "caption": "a closet with a fan and a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277788.jpg", "caption": "a table with vegetables and other items for sale", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277797.jpg", "caption": "a woman holding a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277798.jpg", "caption": "a teddy bear dressed in a red uniform and black hat", "annotations": [{"polygon": [[281, 413], [269, 418], [265, 433], [312, 456], [320, 441]], "text": "Harrod", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "k", "recog_valid": false, "glyph_recog_text": "Harrod", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277799.jpg", "caption": "a softball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277821.jpg", "caption": "a man with glasses and a beard sitting at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015684.jpg", "caption": "a blender is on a counter with a glass of milk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146760.jpg", "caption": "a santa clause figurine on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277852.jpg", "caption": "a skateboarder is doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277867.jpg", "caption": "a row of boats tied up to a dock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000408954.jpg", "caption": "a man and woman talking to each other under a red tent", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000408978.jpg", "caption": "a man in white and red playing tennis on a blue court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277912.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015772.jpg", "caption": "a couple of figurines are sitting on a table with a donut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015794.jpg", "caption": "a view of a donut factory through a glass window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540082.jpg", "caption": "people standing near a train station with a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277950.jpg", "caption": "a man holding a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540110.jpg", "caption": "a man is standing on the back of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146907.jpg", "caption": "a group of people standing around a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015843.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015851.jpg", "caption": "a boy with a bike in the yard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277998.jpg", "caption": "a plate with a sandwich and salad on it", "annotations": [{"polygon": [[395, 274], [470, 287], [469, 313], [396, 305]], "text": "m", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "M", "recog_valid": false, "glyph_recog_text": "m", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[206, 105], [361, 106], [359, 157], [211, 147]], "text": "u", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "W", "recog_valid": false, "glyph_recog_text": "u", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146933.jpg", "caption": "a stuffed animal wearing a hooded sweatshirt", "annotations": [{"polygon": [[250, 242], [258, 256], [285, 238], [277, 226]], "text": "CA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CAA", "recog_valid": false, "glyph_recog_text": "CA", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540162.jpg", "caption": "a toilet with books on the shelf next to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278060.jpg", "caption": "a sandwich on a plate", "annotations": [{"polygon": [[334, 180], [334, 170], [338, 162], [344, 157], [355, 155], [365, 156], [371, 162], [372, 171], [371, 179], [382, 176], [382, 167], [381, 158], [380, 150], [377, 144], [370, 140], [359, 136], [348, 138], [339, 140], [332, 143], [327, 149], [323, 156], [320, 163], [320, 171], [321, 180]], "text": "DR.BROWNS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "A新omg", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146988.jpg", "caption": "a person sitting in front of a television with a box of beer and a bottle of soda", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278066.jpg", "caption": "a woman holding a tennis racket", "annotations": [{"polygon": [[491, 129], [358, 133], [358, 222], [491, 222]], "text": "es", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "es", "recog_valid": true, "glyph_recog_text": "es", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015930.jpg", "caption": "a teddy bear sitting on top of a stack of books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540229.jpg", "caption": "a baseball game is taking place in a stadium", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278100.jpg", "caption": "a man on a motorcycle with an american flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015957.jpg", "caption": "a police officer walking a dog through an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015963.jpg", "caption": "a truck with a wooden bed on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147051.jpg", "caption": "a tennis player is about to hit the ball", "annotations": [{"polygon": [[389, 184], [389, 212], [512, 219], [511, 189]], "text": "XPE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "XPE", "recog_valid": true, "glyph_recog_text": "XPE", "glyph_recog_ld": 1.0}, {"polygon": [[4, 165], [151, 172], [152, 222], [1, 212]], "text": "FILA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "FILA", "recog_valid": true, "glyph_recog_text": "FILA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540270.jpg", "caption": "a plastic container with a variety of food items", "annotations": [{"polygon": [[44, 144], [100, 135], [102, 158], [82, 164], [47, 166]], "text": "IZZE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "IZZE", "recog_valid": true, "glyph_recog_text": "IZZE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540275.jpg", "caption": "a cell phone sitting on a dashboard in a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147076.jpg", "caption": "a tennis player is about to serve the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540307.jpg", "caption": "a man is laughing while holding a hot dog", "annotations": [{"polygon": [[283, 325], [312, 314], [328, 311], [352, 310], [400, 309], [430, 313], [428, 257], [371, 258], [336, 260], [284, 275]], "text": "UCLA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UCL2", "recog_valid": false, "glyph_recog_text": "UCLA", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[291, 353], [299, 352], [310, 352], [329, 349], [360, 349], [421, 347], [421, 318], [339, 323], [304, 322], [279, 327]], "text": "BRUN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BRTCO", "recog_valid": false, "glyph_recog_text": "BRUN", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016027.jpg", "caption": "a bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540317.jpg", "caption": "a girl in a red shirt holding a soccer ball", "annotations": [{"polygon": [[180, 236], [180, 236], [186, 231], [223, 232], [247, 235], [242, 263], [224, 261], [213, 259], [196, 259], [183, 258], [177, 254], [178, 242], [178, 242]], "text": "CSC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CSC", "recog_valid": true, "glyph_recog_text": "CSC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147108.jpg", "caption": "a cart with several suitcases on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016038.jpg", "caption": "a red and black train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147134.jpg", "caption": "a young boy holding a tennis racket on a tennis court", "annotations": [{"polygon": [[209, 280], [184, 316], [199, 336], [230, 295]], "text": "p", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "p", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016080.jpg", "caption": "a man in a green hat and green socks standing next to a stone wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278229.jpg", "caption": "a piece of luggage with stickers on it", "annotations": [{"polygon": [[251, 383], [222, 313], [251, 303], [280, 372]], "text": "NY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "XN", "recog_valid": false, "glyph_recog_text": "NY", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[248, 383], [238, 359], [206, 376], [216, 396]], "text": "I", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "H", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[462, 133], [473, 92], [495, 100], [484, 135]], "text": "WALES", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "S3TVM", "recog_valid": false, "glyph_recog_text": "SEWM", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278230.jpg", "caption": "a person sitting in front of a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278256.jpg", "caption": "a man and woman in formal attire standing next to a poster", "annotations": [{"polygon": [[140, 340], [159, 338], [177, 334], [194, 325], [202, 341], [188, 345], [179, 349], [164, 355], [153, 358], [150, 358]], "text": "UNDER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNDER", "recog_valid": true, "glyph_recog_text": "UNDER", "glyph_recog_ld": 1.0}, {"polygon": [[145, 297], [150, 338], [217, 318], [217, 318], [211, 296]], "text": "Enchant", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cnchan", "recog_valid": false, "glyph_recog_text": "Enchant", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278258.jpg", "caption": "a table topped with two plates of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147196.jpg", "caption": "a fire hydrant with a red button on top", "annotations": [{"polygon": [[163, 174], [177, 168], [198, 187], [219, 196], [213, 210], [180, 198]], "text": "WELLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "E", "recog_valid": false, "glyph_recog_text": "WELLE", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016125.jpg", "caption": "a restaurant with a kitchen and a dining area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278297.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[350, 195], [331, 226], [318, 218], [340, 181]], "text": "LONEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LONEY", "recog_valid": true, "glyph_recog_text": "LONEY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000409375.jpg", "caption": "volleyball club schweinfurt - schweinfurt volleyball club", "annotations": [{"polygon": [[134, 332], [118, 366], [145, 366], [148, 355], [141, 332]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "人", "recog_valid": false, "glyph_recog_text": "t", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540471.jpg", "caption": "a skateboarder is doing a trick on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540476.jpg", "caption": "two street signs that say greek and amethyst", "annotations": [{"polygon": [[128, 239], [278, 292], [273, 337], [120, 289]], "text": "AMETHYST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AMETHYST", "recog_valid": true, "glyph_recog_text": "AMETHYST", "glyph_recog_ld": 1.0}, {"polygon": [[157, 220], [253, 174], [257, 220], [189, 251], [159, 241]], "text": "CREEK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CREEK", "recog_valid": true, "glyph_recog_text": "CREEK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540495.jpg", "caption": "a group of motorcycles parked in front of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016246.jpg", "caption": "a table with donuts and apple cider", "annotations": [{"polygon": [[444, 113], [500, 85], [498, 71], [443, 94], [442, 113]], "text": "Design BOWLS", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Design", "recog_valid": false, "glyph_recog_text": "Dasign BOWL", "glyph_recog_ld": 0.45454595041277235}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147319.jpg", "caption": "a man sitting on a couch with a black cat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016254.jpg", "caption": "a window display of pastries", "annotations": [{"polygon": [[221, 7], [207, 80], [465, 110], [467, 31]], "text": "Monarch", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Mlonatth", "recog_valid": false, "glyph_recog_text": "Monarch", "glyph_recog_ld": 0.6250004687494141}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540547.jpg", "caption": "a truck with a bed full of furniture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147337.jpg", "caption": "a luggage cart with luggage on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540554.jpg", "caption": "a train on the tracks with smoke coming out of the engine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147342.jpg", "caption": "two skateboards mounted on metal brackets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000409488.jpg", "caption": "a man in a hat sitting on a bench reading a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000409510.jpg", "caption": "a young boy pitching a baseball on a field", "annotations": [{"polygon": [[320, 202], [337, 194], [360, 202], [372, 213], [367, 228], [355, 219], [348, 216], [338, 218], [320, 224]], "text": "SLAMM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SLMn", "recog_valid": false, "glyph_recog_text": "SLAMM", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278439.jpg", "caption": "a skateboarder is doing a trick on a ramp", "annotations": [{"polygon": [[227, 291], [207, 253], [381, 250], [403, 280]], "text": "Eetnies", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Eetnies", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147392.jpg", "caption": "a bed with a hello kitty blanket and stuffed animals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278494.jpg", "caption": "a male tennis player is holding a racket", "annotations": [{"polygon": [[213, 129], [214, 179], [178, 221], [179, 176]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "W", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[279, 378], [308, 305], [410, 286], [413, 362], [413, 362]], "text": "NME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "NME", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016361.jpg", "caption": "before and after pictures of a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147466.jpg", "caption": "three children sitting at a table with a white cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016403.jpg", "caption": "a man holding a hot dog and a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016414.jpg", "caption": "a closed down store with graffiti on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278565.jpg", "caption": "a white and green airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540716.jpg", "caption": "a man holding a sign and talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278573.jpg", "caption": "a woman with scissors in her underwear", "annotations": [{"polygon": [[278, 467], [356, 470], [350, 498], [275, 495]], "text": "HARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "HARD", "recog_valid": true, "glyph_recog_text": "HARD", "glyph_recog_ld": 1.0}, {"polygon": [[362, 462], [362, 462], [364, 507], [417, 493], [428, 469], [429, 461], [362, 459]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[273, 429], [263, 457], [350, 465], [362, 429], [349, 420]], "text": "LEARN...", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "LEARN", "recog_valid": false, "glyph_recog_text": "LEARN...", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[144, 418], [134, 464], [201, 465], [210, 429], [187, 419], [175, 420]], "text": "TIME", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TIME", "recog_valid": true, "glyph_recog_text": "TIME", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540728.jpg", "caption": "a woman playing a video game on a wii", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147520.jpg", "caption": "two men cross country skiing down a snowy trail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016449.jpg", "caption": "a street sign pointing to a beauty clinic", "annotations": [{"polygon": [[142, 237], [139, 258], [146, 259], [283, 231], [288, 208]], "text": "STARIONARY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STATIONARY", "recog_valid": false, "glyph_recog_text": "STARIONARY", "glyph_recog_ld": 0.9000000999999}, {"polygon": [[299, 204], [297, 228], [311, 226], [355, 218], [389, 208], [390, 187]], "text": "STORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STORE", "recog_valid": true, "glyph_recog_text": "STORE", "glyph_recog_ld": 1.0}, {"polygon": [[129, 284], [127, 306], [236, 291], [241, 266]], "text": "BEAUTY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BEAUTY", "recog_valid": true, "glyph_recog_text": "BEAUTY", "glyph_recog_ld": 1.0}, {"polygon": [[269, 260], [267, 284], [377, 269], [377, 243]], "text": "CLINIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CLINIC", "recog_valid": true, "glyph_recog_text": "CLINIC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016465.jpg", "caption": "two soccer players are playing on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278612.jpg", "caption": "three people in wetsuits riding surfboards in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540769.jpg", "caption": "a black train on a track with flowers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016494.jpg", "caption": "a plate with a sandwich and some bananas", "annotations": [{"polygon": [[220, 68], [220, 103], [241, 109], [249, 106], [340, 106], [337, 66]], "text": "Salted", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Salted", "recog_valid": true, "glyph_recog_text": "Salted", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540785.jpg", "caption": "a red plane flying through the air with smoke coming out of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016509.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[86, 180], [86, 152], [129, 158], [126, 182]], "text": "Be", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Be", "recog_valid": true, "glyph_recog_text": "Be", "glyph_recog_ld": 1.0}, {"polygon": [[130, 155], [178, 156], [180, 186], [130, 184]], "text": "the", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "the", "recog_valid": true, "glyph_recog_text": "the", "glyph_recog_ld": 1.0}, {"polygon": [[181, 183], [183, 154], [276, 166], [268, 190], [225, 195]], "text": "Lege", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Lege.", "recog_valid": false, "glyph_recog_text": "Lege", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016525.jpg", "caption": "a large clock hanging from the ceiling in a shopping mall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278675.jpg", "caption": "a group of people standing in front of a food truck", "annotations": [{"polygon": [[264, 361], [263, 368], [356, 405], [357, 392]], "text": "www.seulonwheels.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "wwnseooioowheeis.com", "recog_valid": false, "glyph_recog_text": "w.seraath.core", "glyph_recog_ld": 0.30000034999982494}, {"polygon": [[249, 332], [245, 344], [356, 383], [357, 368]], "text": "AWESOMENESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AWESOMENESS", "recog_valid": true, "glyph_recog_text": "AWESOMENESS", "glyph_recog_ld": 1.0}, {"polygon": [[169, 127], [172, 158], [329, 133], [327, 82]], "text": "WHEELA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "wUFELS", "recog_valid": false, "glyph_recog_text": "WHEELA", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016538.jpg", "caption": "a red and white train traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016556.jpg", "caption": "a skateboarder doing a trick in front of a crowd", "annotations": [{"polygon": [[82, 150], [88, 120], [197, 113], [196, 146]], "text": "Boardlife", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Boudlife", "recog_valid": false, "glyph_recog_text": "Boardlife", "glyph_recog_ld": 0.7777780246910837}, {"polygon": [[21, 323], [16, 299], [56, 286], [60, 308]], "text": "addas", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "gcias", "recog_valid": false, "glyph_recog_text": "addas", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[349, 290], [346, 320], [368, 336], [374, 325], [378, 310], [371, 292]], "text": "Boardlife", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "....", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540848.jpg", "caption": "a red bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278710.jpg", "caption": "a group of people playing frisbee on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147646.jpg", "caption": "a group of people on a beach with kites", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278731.jpg", "caption": "a white bus parked next to a white car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540882.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000409821.jpg", "caption": "a bag filled with clothes, shoes, and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016616.jpg", "caption": "a man riding a bike on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016653.jpg", "caption": "an old postcard shows a train traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147735.jpg", "caption": "a woman riding a horse in an arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016680.jpg", "caption": "banana powder, banana powder, banana powder, banana powder, banana powder, banana powder, banana powder, banana powder, banana powder, banana powder, banana", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000409903.jpg", "caption": "a man is walking with two horses on a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147760.jpg", "caption": "two men in red and white soccer uniforms are playing", "annotations": [{"polygon": [[154, 286], [152, 315], [174, 319], [184, 290]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "10", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541004.jpg", "caption": "a microwave oven that has been left in the middle of a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016720.jpg", "caption": "a clock on top of a building with a flag flying in the background", "annotations": [{"polygon": [[22, 334], [14, 384], [162, 367], [171, 314]], "text": "COORS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "COORS", "recog_valid": true, "glyph_recog_text": "COORS", "glyph_recog_ld": 1.0}, {"polygon": [[344, 387], [344, 331], [489, 356], [489, 420]], "text": "FIELD", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "FIELI", "recog_valid": false, "glyph_recog_text": "FIELD", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000409953.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541039.jpg", "caption": "a group of people holding umbrellas and signs", "annotations": [{"polygon": [[152, 319], [240, 321], [239, 354], [153, 354]], "text": "Thank", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Thank", "recog_valid": true, "glyph_recog_text": "Thank", "glyph_recog_ld": 1.0}, {"polygon": [[175, 356], [226, 357], [226, 388], [174, 388]], "text": "you", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "you", "recog_valid": true, "glyph_recog_text": "you", "glyph_recog_ld": 1.0}, {"polygon": [[272, 359], [275, 382], [381, 371], [376, 352]], "text": "UMBERLA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UMBRELLA", "recog_valid": false, "glyph_recog_text": "UMBERLA", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[284, 416], [288, 444], [353, 434], [399, 426], [391, 402]], "text": "HOMELESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "HOMELESS", "recog_valid": true, "glyph_recog_text": "HOMELESS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016755.jpg", "caption": "a train yard with many trains parked on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147830.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000409982.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "annotations": [{"polygon": [[238, 187], [273, 165], [278, 174], [263, 186], [243, 196]], "text": "CHERALO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EKEEALO", "recog_valid": false, "glyph_recog_text": "OHORAUD", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541072.jpg", "caption": "a birdhouse with a bird sitting on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278931.jpg", "caption": "a man and a woman sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147865.jpg", "caption": "a man on a skateboard doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016805.jpg", "caption": "a yellow fire hydrant on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278952.jpg", "caption": "a clock on a brick wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016837.jpg", "caption": "a group of children standing near a train", "annotations": [{"polygon": [[134, 90], [179, 88], [176, 45], [135, 44]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "2", "recog_valid": true, "glyph_recog_text": "2", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541131.jpg", "caption": "a young boy wearing a baseball hat and a glove", "annotations": [{"polygon": [[207, 252], [224, 247], [238, 249], [261, 256], [304, 270], [308, 276], [308, 286], [300, 286], [263, 276], [227, 271], [200, 268]], "text": "Astros", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Jsres", "recog_valid": false, "glyph_recog_text": "Astros", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278989.jpg", "caption": "a british airways plane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147924.jpg", "caption": "a group of boats docked in a marina", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279009.jpg", "caption": "a small airplane parked on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541171.jpg", "caption": "a bus driving down a street with a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147973.jpg", "caption": "a crowd of people standing in a field with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147983.jpg", "caption": "a half eaten donut on a paper plate", "annotations": [{"polygon": [[188, 390], [191, 398], [228, 373], [224, 367]], "text": "Appearance", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Appearanice", "recog_valid": false, "glyph_recog_text": "Anjnekar", "glyph_recog_ld": 0.18181892561915847}, {"polygon": [[132, 212], [139, 221], [203, 195], [195, 182], [130, 210]], "text": "DONUT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CONUT", "recog_valid": false, "glyph_recog_text": "DONUT", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541203.jpg", "caption": "a man is walking by a fire hydrant that is spraying water", "annotations": [{"polygon": [[256, 148], [249, 183], [263, 186], [272, 187], [280, 187], [284, 151]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5", "recog_valid": true, "glyph_recog_text": "5", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279060.jpg", "caption": "a microwave oven sitting on a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541212.jpg", "caption": "a little boy walking in the rain with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279073.jpg", "caption": "a yellow food truck with a woman inside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000410152.jpg", "caption": "a blue train is driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279117.jpg", "caption": "a group of young men and women posing for a photo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541264.jpg", "caption": "three giraffes standing in the grass near trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279129.jpg", "caption": "a baseball player with a green and white mascot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148085.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541313.jpg", "caption": "a woman standing in front of a pink wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148101.jpg", "caption": "two children sitting at a table eating pizza and drinks", "annotations": [{"polygon": [[299, 181], [298, 178], [298, 146], [303, 140], [309, 140], [319, 140], [325, 139], [338, 138], [343, 138], [360, 137], [365, 137], [372, 136], [385, 136], [394, 135], [400, 135], [405, 139], [402, 175], [398, 181], [380, 182], [339, 181], [320, 182], [311, 184]], "text": "CLOSED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CLOSED", "recog_valid": true, "glyph_recog_text": "CLOSED", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279175.jpg", "caption": "a man in a suit and hat at a wedding", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541338.jpg", "caption": "a table with bowls of food and a beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017057.jpg", "caption": "a large truck driving down a street with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148146.jpg", "caption": "a man sitting on the floor watching a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279222.jpg", "caption": "a green and white bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541374.jpg", "caption": "a man holding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148165.jpg", "caption": "a man standing in front of a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017100.jpg", "caption": "a red and white train traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279264.jpg", "caption": "a truck driving down the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017147.jpg", "caption": "a white and red airplane flying in a blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541439.jpg", "caption": "a couple of people sitting on surfboards in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017153.jpg", "caption": "two pizzas, a salad and a drink are in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017166.jpg", "caption": "a street sign for hoscombe school is shown in front of a building", "annotations": [{"polygon": [[231, 116], [258, 94], [256, 77], [228, 99]], "text": "GREEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "GREEN", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148239.jpg", "caption": "a kitchen with a stove, sink and table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017174.jpg", "caption": "a man standing next to a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148246.jpg", "caption": "a wall with a clock, mirror, and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541463.jpg", "caption": "a detour ahead sign on a street corner", "annotations": [{"polygon": [[325, 171], [317, 195], [416, 234], [424, 211]], "text": "DETOUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DETOUR", "recog_valid": true, "glyph_recog_text": "DETOUR", "glyph_recog_ld": 1.0}, {"polygon": [[321, 211], [303, 231], [389, 267], [398, 260], [401, 244]], "text": "AHEAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AHEAD", "recog_valid": true, "glyph_recog_text": "AHEAD", "glyph_recog_ld": 1.0}, {"polygon": [[261, 303], [254, 321], [323, 354], [335, 340]], "text": "BAILEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BAILEY", "recog_valid": true, "glyph_recog_text": "BAILEY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017180.jpg", "caption": "a large airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017192.jpg", "caption": "a view of a king's cross station in london", "annotations": [{"polygon": [[132, 193], [131, 227], [235, 228], [234, 193]], "text": "King's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "King's", "recog_valid": true, "glyph_recog_text": "King’s", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[139, 341], [138, 398], [512, 405], [511, 343]], "text": "ANCRAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AHDLAT", "recog_valid": false, "glyph_recog_text": "ANCRAS", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279345.jpg", "caption": "a man and a woman sitting at a table eating food", "annotations": [{"polygon": [[274, 153], [274, 173], [342, 184], [340, 165]], "text": "HANNAH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "H警控镜层料", "recog_valid": false, "glyph_recog_text": "HANNAH", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017203.jpg", "caption": "a clock on a pole on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541506.jpg", "caption": "a white truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017236.jpg", "caption": "two men in a classroom with a man in a tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541553.jpg", "caption": "a dog licking a birthday cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148361.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279438.jpg", "caption": "a young boy is swinging a bat at a baseball game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541591.jpg", "caption": "a group of people standing in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148375.jpg", "caption": "a house with a car parked in front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279450.jpg", "caption": "a woman sitting on the floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541596.jpg", "caption": "a group of people standing around a bus with a fire truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148385.jpg", "caption": "a person sitting on a bench in a church", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017316.jpg", "caption": "a toilet with the words dirty japanese bathroom com", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279476.jpg", "caption": "a person feeding a bird on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279479.jpg", "caption": "a cat is sitting on a bed next to a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148411.jpg", "caption": "a table with two baskets of food and two forks", "annotations": [{"polygon": [[331, 71], [403, 87], [403, 101], [342, 88], [330, 80]], "text": "TimHowan", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "UEWOH!I", "recog_valid": false, "glyph_recog_text": "TimHowan", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017356.jpg", "caption": "a boy eating a piece of pizza", "annotations": [{"polygon": [[264, 402], [271, 382], [278, 383], [289, 392], [295, 398], [305, 404], [308, 404], [314, 407], [319, 430], [315, 431], [310, 427], [305, 417], [296, 412], [286, 406], [278, 403]], "text": "DAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DOP", "recog_valid": false, "glyph_recog_text": "DAY", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[226, 439], [235, 421], [284, 405], [293, 416], [317, 471], [318, 486], [305, 492], [289, 494], [251, 501]], "text": "28", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "器", "recog_valid": false, "glyph_recog_text": "28", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[301, 498], [316, 487], [320, 481], [323, 479], [332, 478], [337, 483], [326, 498], [314, 509], [311, 513]], "text": "AT", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "AOD", "recog_valid": false, "glyph_recog_text": "AT", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279522.jpg", "caption": "a wii remote and a game controller on a table", "annotations": [{"polygon": [[175, 338], [218, 352], [207, 368], [169, 355]], "text": "Wii", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wii", "recog_valid": true, "glyph_recog_text": "Wii", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148452.jpg", "caption": "a snowboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017401.jpg", "caption": "a person's feet in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541690.jpg", "caption": "a man holding a lamb in a pen at an event", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148480.jpg", "caption": "a group of people walking in the rain with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279563.jpg", "caption": "a banner reading 'tree beadle manning' is displayed in front of a clock tower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148502.jpg", "caption": "a man standing on a bed in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541721.jpg", "caption": "two men sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541725.jpg", "caption": "a suitcase with stickers and other items inside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017451.jpg", "caption": "a parking meter on a railing near the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279602.jpg", "caption": "a skateboarder is riding on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148541.jpg", "caption": "a man holding a red cup and eating a spoon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279616.jpg", "caption": "a group of men standing on top of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017472.jpg", "caption": "a cow is standing in the street next to a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017474.jpg", "caption": "people walking in the rain with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541764.jpg", "caption": "a man riding a cart down a street with a man pushing a cart", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148551.jpg", "caption": "a man in a suit and tie is talking on his cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279632.jpg", "caption": "a group of people posing for a picture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017488.jpg", "caption": "a bunch of apples in a basket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541782.jpg", "caption": "a book with knitting workshop and yarn", "annotations": [{"polygon": [[174, 131], [256, 153], [255, 172], [248, 177], [168, 157]], "text": "KNITTING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KNIITING", "recog_valid": false, "glyph_recog_text": "KNITTING", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[165, 161], [280, 192], [268, 219], [163, 192]], "text": "WORKSHOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WJORKSHOP", "recog_valid": false, "glyph_recog_text": "WORKSHOP", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[143, 231], [268, 263], [262, 279], [137, 246]], "text": "ELIZABETH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ELIZABETH", "recog_valid": true, "glyph_recog_text": "ELIZABETH", "glyph_recog_ld": 1.0}, {"polygon": [[118, 250], [280, 294], [272, 310], [113, 266]], "text": "ELIZABETH ZIMMERMANN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZIMMERMANN", "recog_valid": false, "glyph_recog_text": "ELIZABETH ZIMMERMANN", "glyph_recog_ld": 0.500000249999875}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148583.jpg", "caption": "a man playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017526.jpg", "caption": "a man doing a trick on a skateboard in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000410743.jpg", "caption": "a street sign with an arrow pointing to the right", "annotations": [{"polygon": [[189, 173], [189, 173], [208, 175], [217, 162], [290, 174], [294, 193], [182, 190]], "text": "ANNEX", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CANNEX", "recog_valid": false, "glyph_recog_text": "ANNEX", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[254, 201], [253, 231], [290, 231], [291, 204]], "text": "RD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RD", "recog_valid": true, "glyph_recog_text": "RD", "glyph_recog_ld": 1.0}, {"polygon": [[118, 196], [109, 225], [238, 230], [231, 201]], "text": "ADMIRAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ADMIRAL", "recog_valid": true, "glyph_recog_text": "ADMIRAL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148629.jpg", "caption": "a clock on a table next to a radio", "annotations": [{"polygon": [[405, 284], [399, 296], [473, 321], [482, 313], [482, 311], [404, 284]], "text": "POESYSOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "PolESY6oT", "recog_valid": false, "glyph_recog_text": "POESYSOT", "glyph_recog_ld": 0.5555560493821674}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279726.jpg", "caption": "a train on the tracks in a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000410808.jpg", "caption": "a large clock on a pole in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000410817.jpg", "caption": "a street light with a green light on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279747.jpg", "caption": "a traffic light with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279749.jpg", "caption": "a group of people standing in the snow with ski poles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541900.jpg", "caption": "a truck with an elephant on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000410836.jpg", "caption": "a computer monitor and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017625.jpg", "caption": "a stop sign on a wall", "annotations": [{"polygon": [[222, 247], [219, 295], [333, 299], [347, 248]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148700.jpg", "caption": "a street with a traffic light and trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541918.jpg", "caption": "a man riding a surfboard in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148727.jpg", "caption": "a santa clause riding a motorcycle down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279804.jpg", "caption": "a motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148747.jpg", "caption": "a person is jumping in the air on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017675.jpg", "caption": "a large jetliner on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017683.jpg", "caption": "a group of buses parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279829.jpg", "caption": "a cat sitting in a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000410904.jpg", "caption": "a dog is standing next to a hat and a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017696.jpg", "caption": "a group of people doing a trick on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017703.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542000.jpg", "caption": "aairbus a320-214 - airbus a320-214 - airbus a320-214 - airbus a320-214 - airbus a", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017712.jpg", "caption": "a long boat with people on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542020.jpg", "caption": "two people on surfboards riding waves in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279883.jpg", "caption": "two men sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542041.jpg", "caption": "a man wearing a hat", "annotations": [{"polygon": [[91, 283], [93, 300], [24, 338], [20, 321]], "text": "ACME", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "EWSZ", "recog_valid": false, "glyph_recog_text": "ACME", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[16, 304], [19, 315], [51, 297], [49, 285], [29, 296]], "text": "About", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "1n09V", "recog_valid": false, "glyph_recog_text": "Arox!", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[371, 121], [379, 138], [424, 184], [447, 221], [448, 247], [463, 258], [485, 253], [479, 232], [469, 201], [432, 152], [404, 117]], "text": "Squirt", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "aunbs", "recog_valid": false, "glyph_recog_text": "Squirt", "glyph_recog_ld": 0.1666680555532407}, {"polygon": [[311, 147], [329, 194], [341, 190], [337, 174], [319, 140]], "text": "quirt", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "auinb", "recog_valid": false, "glyph_recog_text": "euirl", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148827.jpg", "caption": "a clock tower with a clock on it", "annotations": [{"polygon": [[0, 158], [57, 206], [60, 198], [51, 188], [49, 185], [42, 180], [23, 164], [22, 161], [17, 157], [3, 150]], "text": "odjunction", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "odjunction", "recog_valid": true, "glyph_recog_text": "sdunction", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017760.jpg", "caption": "a sign that says vendo scooter", "annotations": [{"polygon": [[309, 99], [309, 121], [416, 130], [417, 109]], "text": "VENDO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "VENDO", "recog_valid": true, "glyph_recog_text": "VENDO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000410981.jpg", "caption": "a group of kids playing baseball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148841.jpg", "caption": "two men riding a motorcycle with a trailer attached", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279917.jpg", "caption": "a man laying on a bed with a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148852.jpg", "caption": "a man and woman standing next to each other at a trade show", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542069.jpg", "caption": "a cubicle with a desk and a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411009.jpg", "caption": "a group of boys playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279946.jpg", "caption": "a woman sitting at a table with food", "annotations": [{"polygon": [[383, 160], [379, 163], [395, 204], [414, 197]], "text": "HOME", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "HOME", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[387, 158], [385, 161], [414, 196], [428, 183]], "text": "AMPTITHENTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "HATRNEN", "recog_valid": false, "glyph_recog_text": "n", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[391, 136], [391, 147], [442, 127], [444, 117]], "text": "GENTLEMAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AMN", "recog_valid": false, "glyph_recog_text": "GENDEMAN", "glyph_recog_ld": 0.2500009374988281}, {"polygon": [[310, 108], [307, 116], [338, 143], [342, 141]], "text": "DRESSED", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ODL", "recog_valid": false, "glyph_recog_text": "盗国esdeo", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[317, 83], [312, 92], [344, 135], [347, 132], [332, 97]], "text": "ELONOUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ELOHIS", "recog_valid": false, "glyph_recog_text": "ELONOUS", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148884.jpg", "caption": "a box of donuts and other food items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411028.jpg", "caption": "a yellow train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017817.jpg", "caption": "a truck loaded with metal poles on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542146.jpg", "caption": "a banana and a biscuit on a paper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411104.jpg", "caption": "two parking meters are on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542178.jpg", "caption": "a subway train is parked at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411109.jpg", "caption": "a baseball player sliding into home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411111.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542186.jpg", "caption": "three shirts and ties on display on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280059.jpg", "caption": "two horses pulling a cart with boxes on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280068.jpg", "caption": "a vase filled with colorful flowers on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017945.jpg", "caption": "a blue umbrella with polka dots", "annotations": [{"polygon": [[22, 132], [22, 132], [19, 146], [19, 146], [121, 172], [121, 172], [123, 159], [123, 159]], "text": "GABRIELLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "GABRIELLES", "recog_valid": true, "glyph_recog_text": "GABRIELLES", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280119.jpg", "caption": "a man sitting in a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280123.jpg", "caption": "a black and white photo of a stop sign", "annotations": [{"polygon": [[349, 383], [351, 449], [186, 448], [186, 380]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[327, 264], [329, 294], [383, 297], [384, 268]], "text": "GORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GORE", "recog_valid": true, "glyph_recog_text": "GORE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018006.jpg", "caption": "a yellow double decker bus", "annotations": [{"polygon": [[116, 365], [116, 365], [139, 382], [242, 366], [210, 352]], "text": "STO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "sTa", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149079.jpg", "caption": "a bathroom with a toilet, sink and window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280158.jpg", "caption": "a person skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280160.jpg", "caption": "a boat tied to a rope in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542307.jpg", "caption": "a black and white photo of a school bus", "annotations": [{"polygon": [[93, 393], [184, 418], [167, 477], [93, 457]], "text": "OOL", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "0OL", "recog_valid": false, "glyph_recog_text": "OOL", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[222, 427], [308, 448], [293, 508], [202, 485]], "text": "BUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "355", "recog_valid": false, "glyph_recog_text": "BUS", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411243.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018029.jpg", "caption": "a stop sign has texts written on it", "annotations": [{"polygon": [[164, 287], [312, 290], [311, 221], [166, 208]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411253.jpg", "caption": "a dead end sign on a street corner", "annotations": [{"polygon": [[176, 229], [174, 275], [288, 277], [290, 230]], "text": "DEAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DEAD", "recog_valid": true, "glyph_recog_text": "DEAD", "glyph_recog_ld": 1.0}, {"polygon": [[316, 230], [397, 231], [397, 277], [316, 277]], "text": "END", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "END", "recog_valid": true, "glyph_recog_text": "END", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280187.jpg", "caption": "a yellow car parked at a stop light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542338.jpg", "caption": "a red train is parked at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280199.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411295.jpg", "caption": "a person in the air on skis", "annotations": [{"polygon": [[137, 194], [131, 184], [154, 107], [163, 112]], "text": "VENENT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZASAE", "recog_valid": false, "glyph_recog_text": "¥长U长NT", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[110, 85], [85, 115], [78, 107], [98, 84]], "text": "NT", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "LN", "recog_valid": false, "glyph_recog_text": "NT", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018099.jpg", "caption": "a bump sign on a street corner", "annotations": [{"polygon": [[221, 166], [315, 177], [312, 198], [216, 187]], "text": "BUMP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BUMP", "recog_valid": true, "glyph_recog_text": "BUMP", "glyph_recog_ld": 1.0}, {"polygon": [[235, 262], [277, 267], [272, 301], [234, 295]], "text": "15", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "15", "recog_valid": true, "glyph_recog_text": "15", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149192.jpg", "caption": "a restaurant with tables and chairs outside", "annotations": [{"polygon": [[308, 77], [359, 100], [359, 74], [330, 62], [314, 65], [306, 65]], "text": "CVS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "191", "recog_valid": false, "glyph_recog_text": "cvs", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280270.jpg", "caption": "a table with a plate of food and a drink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542426.jpg", "caption": "a man and two boys are standing in an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280291.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149221.jpg", "caption": "a bottle of wine and two glasses sitting on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542444.jpg", "caption": "a man throwing a frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149233.jpg", "caption": "a man holding a baseball bat in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542452.jpg", "caption": "a crowd of people walking down a street with tents", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411384.jpg", "caption": "a parking meter on a city street", "annotations": [{"polygon": [[265, 60], [271, 64], [274, 58], [278, 51], [286, 44], [293, 41], [297, 39], [305, 37], [305, 32], [297, 32], [288, 37], [282, 41], [272, 51]], "text": "DENVER'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "DENVER", "recog_valid": false, "glyph_recog_text": "DENVERS", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542458.jpg", "caption": "a clock tower with statues on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018183.jpg", "caption": "a large clock in a church with a sun shining through it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542475.jpg", "caption": "a man wearing a baseball cap", "annotations": [{"polygon": [[180, 109], [180, 109], [184, 105], [206, 59], [235, 50], [239, 54], [239, 71], [227, 78], [231, 93], [223, 98], [216, 109], [200, 118], [193, 118], [182, 123], [177, 117]], "text": "B", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "B", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280339.jpg", "caption": "a bus driving down a street in a small town", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280340.jpg", "caption": "a man holding a can of beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018214.jpg", "caption": "a street with cars and signs in the middle of a town", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542502.jpg", "caption": "a stop sign with a street sign on top of it", "annotations": [{"polygon": [[279, 36], [279, 72], [271, 76], [108, 33], [109, -1], [163, 0], [211, 13], [256, 26], [274, 31]], "text": "HILLEGASS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HILLEGASS", "recog_valid": true, "glyph_recog_text": "HILLEGASS", "glyph_recog_ld": 1.0}, {"polygon": [[324, 71], [325, 93], [373, 83], [375, 69], [371, 58], [330, 66]], "text": "2600", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "2600", "recog_valid": true, "glyph_recog_text": "2600", "glyph_recog_ld": 1.0}, {"polygon": [[269, 83], [268, 136], [151, 154], [150, 108]], "text": "STUART", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STUARI", "recog_valid": false, "glyph_recog_text": "STUART", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[172, 235], [161, 253], [157, 320], [167, 346], [184, 354], [325, 377], [343, 332], [353, 309], [346, 274], [280, 252], [186, 230]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[194, 376], [190, 419], [323, 432], [325, 391]], "text": "DRIVING", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DRIVING", "recog_valid": true, "glyph_recog_text": "DRIVING", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411443.jpg", "caption": "a man and a child are playing tennis on a court", "annotations": [{"polygon": [[169, 310], [184, 283], [209, 289], [201, 315]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "W", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542541.jpg", "caption": "a group of fire trucks parked on the street", "annotations": [{"polygon": [[266, 307], [263, 320], [398, 356], [397, 338]], "text": "BOMBEROS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BOMBEROS", "recog_valid": true, "glyph_recog_text": "BOMBEROS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149331.jpg", "caption": "a crane is working on a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411478.jpg", "caption": "a pizza, a bottle of beer, and a remote control", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411492.jpg", "caption": "a man sitting at a desk in front of a wall of clocks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280423.jpg", "caption": "a train at a train station with people walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411501.jpg", "caption": "two boys holding toothbrushes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280429.jpg", "caption": "a table with a bunch of post-it notes and a pair of scissors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018290.jpg", "caption": "a man swinging a bat at a baseball game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149366.jpg", "caption": "a motorcycle parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542582.jpg", "caption": "a bus and a train crossing the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149394.jpg", "caption": "a man in white tennis gear is about to hit a tennis ball", "annotations": [{"polygon": [[241, 47], [241, 47], [348, 49], [350, 81], [225, 79]], "text": "AEGON", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "AEGON", "recog_valid": true, "glyph_recog_text": "AEGON", "glyph_recog_ld": 1.0}, {"polygon": [[359, 49], [418, 49], [424, 74], [425, 84], [360, 81]], "text": "CHA", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "CHA", "recog_valid": true, "glyph_recog_text": "CHA", "glyph_recog_ld": 1.0}, {"polygon": [[87, 175], [412, 173], [414, 185], [416, 203], [409, 221], [387, 230], [87, 236]], "text": "HAIPIO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HAIPIO", "recog_valid": true, "glyph_recog_text": "HAIPIO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542614.jpg", "caption": "a street sign is on a pole in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149410.jpg", "caption": "four pictures of dogs wearing blue vests and one with a leash", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280484.jpg", "caption": "a man loading wood into a garbage truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149414.jpg", "caption": "a bus driving down a street with a church in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542630.jpg", "caption": "a baseball game with a crowd watching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149424.jpg", "caption": "a man and woman standing next to a stop sign", "annotations": [{"polygon": [[133, 169], [230, 160], [236, 159], [230, 129], [130, 137]], "text": "Dont", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Dot", "recog_valid": false, "glyph_recog_text": "Dont", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[77, 283], [297, 257], [289, 166], [71, 189]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[369, 162], [490, 162], [482, 110], [369, 109]], "text": "KRISTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "KRISTA", "recog_valid": true, "glyph_recog_text": "KRISTA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280508.jpg", "caption": "a refrigerator with a variety of drinks and food", "annotations": [{"polygon": [[203, 237], [219, 216], [233, 217], [241, 221], [218, 256]], "text": "Fanta", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Fam", "recog_valid": false, "glyph_recog_text": "Fanta", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[243, 232], [252, 216], [270, 217], [274, 220], [252, 254]], "text": "Fanta", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tam", "recog_valid": false, "glyph_recog_text": "Fanta", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[289, 235], [305, 216], [323, 216], [324, 221], [305, 253]], "text": "Fanta", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "fani", "recog_valid": false, "glyph_recog_text": "Fanta", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542658.jpg", "caption": "a man jumping to catch a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411603.jpg", "caption": "a colorful food truck parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280532.jpg", "caption": "a street sign with two signs on it", "annotations": [{"polygon": [[141, 241], [139, 289], [200, 304], [201, 259]], "text": "Eight", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Eight", "recog_valid": true, "glyph_recog_text": "Eigh", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[219, 265], [217, 300], [260, 309], [261, 278]], "text": "Mile", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Mile", "recog_valid": true, "glyph_recog_text": "Mile", "glyph_recog_ld": 1.0}, {"polygon": [[129, 330], [130, 372], [281, 357], [278, 313]], "text": "Shadyside", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Shadyside", "recog_valid": true, "glyph_recog_text": "Shadyside", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149467.jpg", "caption": "a baseball player is swinging at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018395.jpg", "caption": "a vase with three red roses in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542695.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[274, 201], [274, 222], [329, 211], [328, 191]], "text": "MTSSACHUSETTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MASSACHUSETTS", "recog_valid": false, "glyph_recog_text": "修", "glyph_recog_ld": 7.692301774442356e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280554.jpg", "caption": "a man sitting on the sidewalk with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018432.jpg", "caption": "a man sitting at a table with plates of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018441.jpg", "caption": "a large jet airplane taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018467.jpg", "caption": "a white container with broccoli, carrots, and broccoli", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149551.jpg", "caption": "a statue of an elephant with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149556.jpg", "caption": "two dell mouses", "annotations": [{"polygon": [[289, 141], [307, 145], [317, 128], [329, 117], [325, 115], [312, 114], [304, 121], [297, 130]], "text": "DELL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "DO", "recog_valid": false, "glyph_recog_text": "DELL", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[154, 252], [154, 252], [172, 257], [180, 247], [190, 238], [195, 232], [210, 223], [205, 221], [190, 219], [187, 222], [180, 224]], "text": "DELL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DKLL", "recog_valid": false, "glyph_recog_text": "DELL", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018482.jpg", "caption": "a woman standing on a sidewalk next to a street sign", "annotations": [{"polygon": [[257, 116], [336, 107], [337, 130], [256, 137]], "text": "Brooklyn", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Brooklyn", "recog_valid": true, "glyph_recog_text": "Brooklyn", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411703.jpg", "caption": "a sandwich and salad on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411705.jpg", "caption": "a man in a safety vest standing next to a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542779.jpg", "caption": "a group of people standing around a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411717.jpg", "caption": "a muffin on a plate with a cup of tea", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018501.jpg", "caption": "a group of children brushing their teeth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018513.jpg", "caption": "a traffic light tree with many different colored lights", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280670.jpg", "caption": "a young boy standing in front of a goal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411748.jpg", "caption": "the clock tower at the ferry building in san francisco", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149610.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018559.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149634.jpg", "caption": "a train car with the word the grain on it", "annotations": [{"polygon": [[161, 271], [277, 279], [277, 305], [160, 304]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THE", "recog_valid": true, "glyph_recog_text": "THE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411792.jpg", "caption": "a traffic light with a no turn on red sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411824.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149682.jpg", "caption": "a woman holding a teddy bear in her lap", "annotations": [{"polygon": [[300, 151], [298, 159], [336, 190], [361, 202], [369, 196], [328, 164], [315, 154], [301, 152]], "text": "WHISPER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "whisper", "recog_valid": false, "glyph_recog_text": "WHISPER", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[302, 176], [329, 216], [337, 212], [319, 183], [312, 176], [306, 172], [301, 176]], "text": "whisper", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "whisper", "recog_valid": true, "glyph_recog_text": "wiinpd!", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018627.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411844.jpg", "caption": "an air asia airplane taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542915.jpg", "caption": "a man in a suit and tie cutting into a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280778.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280805.jpg", "caption": "a man standing next to a toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018661.jpg", "caption": "a taxi cab is parked on the side of the road", "annotations": [{"polygon": [[411, 98], [410, 127], [447, 132], [444, 98]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "M", "recog_valid": true, "glyph_recog_text": "M", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411885.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018683.jpg", "caption": "two pictures of a little girl and a little boy with a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280833.jpg", "caption": "a man and a woman sitting on a couch", "annotations": [{"polygon": [[49, 84], [50, 182], [17, 164], [17, 83]], "text": "EVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ZAZ", "recog_valid": false, "glyph_recog_text": "w>w", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280838.jpg", "caption": "a clock is shown on a fence in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543000.jpg", "caption": "a train on the tracks near a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411934.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "annotations": [{"polygon": [[84, 380], [84, 408], [452, 404], [452, 378]], "text": "NPICTMURESK", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DPICTORES!(", "recog_valid": false, "glyph_recog_text": "NPICTMURESK", "glyph_recog_ld": 0.5454549586773103}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280879.jpg", "caption": "a yellow sign on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280883.jpg", "caption": "a bus driving down a city street with cars parked on both sides", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411954.jpg", "caption": "a traffic light and a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411962.jpg", "caption": "a living room with a fireplace, two couches and a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543038.jpg", "caption": "a man in a tie and shirt with a bird on his shoulder", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280898.jpg", "caption": "a bus with colorful designs on it is parked on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149834.jpg", "caption": "a large jet airplane sitting on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018766.jpg", "caption": "a bicycle with an umbrella attached to it", "annotations": [{"polygon": [[151, 374], [194, 428], [210, 426], [160, 366]], "text": "Ko", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "BOU", "recog_valid": false, "glyph_recog_text": "Ko", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543058.jpg", "caption": "a motorcycle with a drum on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149842.jpg", "caption": "a man and a giraffe standing next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543060.jpg", "caption": "a boat is docked in a river with a crowd of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018773.jpg", "caption": "a hot dog on a bun", "annotations": [{"polygon": [[357, 369], [348, 348], [332, 337], [339, 325], [355, 337], [363, 357]], "text": "hotdog", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "notdo", "recog_valid": false, "glyph_recog_text": "hotdog", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[364, 351], [360, 337], [347, 325], [337, 324], [347, 314], [361, 322], [374, 343]], "text": "hotdog", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "otdog", "recog_valid": false, "glyph_recog_text": "hotdo", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[390, 310], [374, 295], [351, 298], [369, 275], [385, 275], [393, 281], [401, 297], [394, 311]], "text": "APF", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "P", "recog_valid": false, "glyph_recog_text": "APF", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280926.jpg", "caption": "the contents of a backpack laid out on the floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412002.jpg", "caption": "a man and woman standing in front of a clock", "annotations": [{"polygon": [[256, 466], [258, 496], [301, 493], [342, 487], [342, 470]], "text": "Queen", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "QUEEN", "recog_valid": false, "glyph_recog_text": "Queen", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149862.jpg", "caption": "a green and white airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412009.jpg", "caption": "a piece of cake on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149871.jpg", "caption": "a fighter jet flying in the sky with its landing gear down", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018811.jpg", "caption": "a man in a suit and tie walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018816.jpg", "caption": "a woman holding skis and standing in front of a ski lift", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149892.jpg", "caption": "a bench sits on the shore of a lake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149895.jpg", "caption": "a building with a moon on top of it at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412040.jpg", "caption": "a giraffe standing in a field next to a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280980.jpg", "caption": "a woman in a santa costume standing in a bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280995.jpg", "caption": "a red bus driving down a street next to a church", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149924.jpg", "caption": "a bathroom with a toilet and sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281021.jpg", "caption": "boats are docked in the water near houses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543166.jpg", "caption": "a parking meter is sitting on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018882.jpg", "caption": "a man on a dirt bike doing a trick", "annotations": [{"polygon": [[284, 151], [297, 140], [286, 141], [273, 131], [273, 126], [266, 121], [261, 131]], "text": "Gas", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Gas", "recog_valid": true, "glyph_recog_text": "Gas", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281035.jpg", "caption": "a shelf with a blender, mixer and a coffee maker", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281054.jpg", "caption": "a postcard of a group of people in a boat", "annotations": [{"polygon": [[196, 154], [191, 140], [190, 124], [200, 115], [212, 110], [226, 113], [232, 121], [242, 110], [233, 103], [221, 99], [208, 98], [195, 103], [186, 111], [180, 124], [178, 138], [180, 148], [183, 158], [192, 158]], "text": "CHRIST", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "CHRIST", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149996.jpg", "caption": "a group of people standing in a field holding surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412142.jpg", "caption": "a train station with a red bench and a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412146.jpg", "caption": "a man in a suit and tie sitting at a table with other people", "annotations": [{"polygon": [[247, 176], [247, 176], [329, 174], [332, 203], [243, 206]], "text": "FU-China", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FLL-China", "recog_valid": false, "glyph_recog_text": "FU-China", "glyph_recog_ld": 0.7777780246910837}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281080.jpg", "caption": "a silver and purple train sitting at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018958.jpg", "caption": "a pair of red skate shoes on top of a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412174.jpg", "caption": "a couple walking by a clock on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281103.jpg", "caption": "a large passenger jet flying in the air", "annotations": [{"polygon": [[171, 183], [197, 192], [218, 220], [225, 242], [212, 252], [189, 233], [161, 198], [165, 184]], "text": "Alaska", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5110910", "recog_valid": false, "glyph_recog_text": "Alaska", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412187.jpg", "caption": "a cat laying on a shelf with a mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150044.jpg", "caption": "two trains are parked next to each other on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412190.jpg", "caption": "a kitchen counter with a sink full of black appliances", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281122.jpg", "caption": "a chair with a pair of scissors on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281125.jpg", "caption": "a suitcase sitting on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018983.jpg", "caption": "a bathroom with a toilet and a wall with a red sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543291.jpg", "caption": "a window display with a television set", "annotations": [{"polygon": [[446, 277], [451, 303], [459, 301], [505, 284], [507, 275], [486, 263], [451, 270]], "text": "Barlie", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Banbie", "recog_valid": false, "glyph_recog_text": "Barlie", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019015.jpg", "caption": "a large clock is hanging from a window in a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281160.jpg", "caption": "a street sign with chinese characters on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281180.jpg", "caption": "a large jetliner flying through the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281181.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[423, 509], [229, 406], [72, 445], [280, 511]], "text": "Mist", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "3", "recog_valid": false, "glyph_recog_text": "Mist", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281191.jpg", "caption": "a clock and glasses sit on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412267.jpg", "caption": "a bathroom with a large bathtub and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281196.jpg", "caption": "a person riding a dirt bike on a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281207.jpg", "caption": "a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281210.jpg", "caption": "a clock hanging on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412289.jpg", "caption": "a laptop computer sitting on a porch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150151.jpg", "caption": "a woman standing next to a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281224.jpg", "caption": "a table and chairs with an umbrella", "annotations": [{"polygon": [[390, 213], [403, 222], [408, 230], [418, 237], [447, 265], [439, 272], [400, 237], [381, 226]], "text": "Beazer", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Beazer", "recog_valid": true, "glyph_recog_text": "Beazer", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543371.jpg", "caption": "a building with columns and a clock tower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019085.jpg", "caption": "a white school bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150161.jpg", "caption": "an old postcard shows people with umbrellas on the side of a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543392.jpg", "caption": "the swiss chalet in the alps", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281262.jpg", "caption": "a view of an airport from an airplane window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543407.jpg", "caption": "a woman riding a motorcycle down the road with a car behind her", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150192.jpg", "caption": "a man sitting at a table with three cups of coffee", "annotations": [{"polygon": [[122, 408], [102, 437], [31, 431], [62, 407]], "text": "PD", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "罗", "recog_valid": false, "glyph_recog_text": "P D", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150196.jpg", "caption": "a bus driving down a street next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281285.jpg", "caption": "a double decker bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281288.jpg", "caption": "a white bunny shaped phone holder with a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543444.jpg", "caption": "a person riding a motorcycle on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412390.jpg", "caption": "a display of bananas and other fruits in a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543466.jpg", "caption": "a street sign has texts", "annotations": [{"polygon": [[64, 180], [60, 200], [68, 204], [99, 201], [101, 173], [67, 177]], "text": "OK", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "0K", "recog_valid": false, "glyph_recog_text": "OK", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543470.jpg", "caption": "a person holding a pizza on a pan", "annotations": [{"polygon": [[41, 122], [28, 129], [39, 143], [53, 152], [75, 158], [103, 157], [114, 148], [128, 133], [124, 127], [118, 122], [106, 134], [97, 142], [85, 146], [68, 142], [55, 137], [46, 127]], "text": "santangelo", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "banieangoo", "recog_valid": false, "glyph_recog_text": "santangelo", "glyph_recog_ld": 0.5000004999995}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019185.jpg", "caption": "a parking meter on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019193.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281355.jpg", "caption": "two men standing next to each other holding a stop sign", "annotations": [{"polygon": [[327, 115], [326, 144], [257, 146], [257, 114]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281358.jpg", "caption": "cows eating hay in a barn", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412442.jpg", "caption": "a man holding a sign that says tattoo piercing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543517.jpg", "caption": "a us air force plane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019236.jpg", "caption": "a group of men standing next to a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543541.jpg", "caption": "a display of bananas and tomatoes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412501.jpg", "caption": "a fedex airplane sitting on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543585.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[131, 20], [183, 20], [183, 50], [133, 50]], "text": "120", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "120", "recog_valid": true, "glyph_recog_text": "120", "glyph_recog_ld": 1.0}, {"polygon": [[142, 290], [167, 301], [154, 340], [130, 331]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0D", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150370.jpg", "caption": "a vintage slot machine with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150385.jpg", "caption": "a street sign on a pole with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412563.jpg", "caption": "a tv mounted on the wall in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150421.jpg", "caption": "a woman on a bus", "annotations": [{"polygon": [[224, 358], [224, 416], [319, 411], [316, 353]], "text": "FSU", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "FSU", "recog_valid": true, "glyph_recog_text": "FSU", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281498.jpg", "caption": "a young girl eating a slice of pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412575.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019385.jpg", "caption": "two children in a small airplane on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019387.jpg", "caption": "a large building with a clock on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543683.jpg", "caption": "air canada boeing 767-300 airbus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019397.jpg", "caption": "a cat sitting on a chair in a room", "annotations": [{"polygon": [[185, 391], [211, 361], [222, 369], [199, 400]], "text": "Cat", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cat", "recog_valid": true, "glyph_recog_text": "Cat", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281557.jpg", "caption": "a woman sitting at a table with a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281564.jpg", "caption": "a red mesh basket with a bunch of different colored pens and scissors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019435.jpg", "caption": "a train traveling down the tracks in the desert", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543729.jpg", "caption": "a table with a wii, a computer, and a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019448.jpg", "caption": "a woman is looking at a refrigerator in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281599.jpg", "caption": "a white building with a clock on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412681.jpg", "caption": "a man sitting on a bed with a cat looking at him", "annotations": [{"polygon": [[286, 335], [286, 335], [291, 330], [300, 328], [307, 328], [310, 336], [318, 343], [328, 342], [346, 343], [355, 343], [364, 340], [375, 332], [385, 336], [400, 349], [401, 357], [395, 366], [393, 365], [316, 368], [326, 368], [302, 378], [291, 380]], "text": "Toronto", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Toronto", "recog_valid": true, "glyph_recog_text": "Toronto", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412691.jpg", "caption": "a snowboarder jumps over a large pile of snow balls", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150552.jpg", "caption": "a red bus driving down a street with people walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543769.jpg", "caption": "a man standing on a tennis court holding a racquet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019487.jpg", "caption": "a man with long hair playing tennis", "annotations": [{"polygon": [[179, 290], [196, 282], [237, 265], [237, 265], [244, 281], [224, 287], [211, 290], [206, 295], [185, 303]], "text": "KIDS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KIOS", "recog_valid": false, "glyph_recog_text": "KIDS", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412704.jpg", "caption": "a street sign with a street name on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281631.jpg", "caption": "a clock on a pole in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019496.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281650.jpg", "caption": "a baby is playing with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281652.jpg", "caption": "a truck painted in the colors of the american flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543803.jpg", "caption": "a black and white photo of a baseball team", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412734.jpg", "caption": "two people sitting on a bench next to a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019528.jpg", "caption": "a dog looking at a christmas tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150614.jpg", "caption": "a boy is running to the plate with a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543833.jpg", "caption": "a window with a vase of flowers on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412762.jpg", "caption": "a family poses with their luggage in front of a car", "annotations": [{"polygon": [[168, 75], [168, 97], [306, 115], [304, 93], [216, 81]], "text": "SCHIPHOL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SCHIPHOL", "recog_valid": true, "glyph_recog_text": "SCHIPHOL", "glyph_recog_ld": 1.0}, {"polygon": [[343, 99], [342, 118], [435, 129], [435, 112], [370, 102]], "text": "AIRPORT", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "AIRPORT", "recog_valid": true, "glyph_recog_text": "AIRPORT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019542.jpg", "caption": "a traffic light and a stop sign in front of wind turbines", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281700.jpg", "caption": "a refrigerator in a room with a purple wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150636.jpg", "caption": "a view of a city with a bridge and a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543855.jpg", "caption": "a stop sign with a street sign on top of it", "annotations": [{"polygon": [[201, 141], [204, 170], [254, 205], [267, 188]], "text": "FAIRVIEW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FAIRVIE", "recog_valid": false, "glyph_recog_text": "FAIRVIEW", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[233, 242], [235, 266], [270, 228], [263, 201]], "text": "CRISTINA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RSNA", "recog_valid": false, "glyph_recog_text": "CRISTINA", "glyph_recog_ld": 0.5000006249992187}, {"polygon": [[206, 304], [200, 350], [310, 352], [310, 309]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150669.jpg", "caption": "a clock hanging on the wall in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019622.jpg", "caption": "a snowboarder in the air doing a trick", "annotations": [{"polygon": [[198, 244], [211, 251], [188, 307], [175, 299]], "text": "FORUM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WnHOd", "recog_valid": false, "glyph_recog_text": "FORUM", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543916.jpg", "caption": "a street sign with a blue and white sign", "annotations": [{"polygon": [[161, 244], [159, 280], [271, 275], [269, 239]], "text": "LEONA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LEONA", "recog_valid": true, "glyph_recog_text": "LEONA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281774.jpg", "caption": "a stack of books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412848.jpg", "caption": "a person sailing a small sailboat on the water", "annotations": [{"polygon": [[239, 234], [233, 248], [262, 264], [268, 251]], "text": "1394", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1394", "recog_valid": true, "glyph_recog_text": "1394", "glyph_recog_ld": 1.0}, {"polygon": [[233, 251], [227, 265], [258, 281], [264, 267], [247, 258]], "text": "PeEI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HEEI", "recog_valid": false, "glyph_recog_text": "PeEl", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543922.jpg", "caption": "a clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019635.jpg", "caption": "a glass of wine next to a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019642.jpg", "caption": "a young boy holding a baseball bat in a field", "annotations": [{"polygon": [[212, 217], [214, 248], [248, 246], [242, 219]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ia", "recog_valid": false, "glyph_recog_text": "13", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[283, 60], [254, 94], [258, 102], [286, 70]], "text": "RAPTOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ROPP", "recog_valid": false, "glyph_recog_text": "SAATAR", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281818.jpg", "caption": "a swiss train on the tracks with a mountain in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150746.jpg", "caption": "a cat laying on top of a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019681.jpg", "caption": "a green bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150754.jpg", "caption": "a black and white photo of three people sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150773.jpg", "caption": "a woman riding a surfboard in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019707.jpg", "caption": "a skateboarder is doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019711.jpg", "caption": "a stove with a bunch of oranges on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281855.jpg", "caption": "black and white photo of people on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412929.jpg", "caption": "a cat sleeping on a remote control", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412931.jpg", "caption": "a man is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544005.jpg", "caption": "a goose is standing on the dashboard of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412947.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[229, 209], [223, 244], [254, 250], [258, 245], [262, 217]], "text": "36", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "36", "recog_valid": true, "glyph_recog_text": "8", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281888.jpg", "caption": "a man walking down a street with a crowd of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281891.jpg", "caption": "a cow walking in the water", "annotations": [{"polygon": [[139, 453], [188, 454], [188, 494], [141, 493]], "text": "Vlay", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Way", "recog_valid": false, "glyph_recog_text": "Vlay", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019753.jpg", "caption": "a cake with a dolphin and an octopus on it", "annotations": [{"polygon": [[244, 183], [287, 189], [277, 227], [228, 211]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "a", "recog_valid": false, "glyph_recog_text": "9", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[182, 211], [114, 257], [155, 295], [234, 313], [312, 299], [345, 296], [387, 282], [419, 261], [378, 227], [293, 225], [254, 247]], "text": "Amelie", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "tmeve", "recog_valid": false, "glyph_recog_text": "Amelie", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019754.jpg", "caption": "a woman is doing a trick on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544059.jpg", "caption": "a man holding a surfboard in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281920.jpg", "caption": "a plate of food with broccoli and chicken", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281939.jpg", "caption": "a street sign with a building in the background", "annotations": [{"polygon": [[305, 411], [302, 433], [390, 467], [383, 444]], "text": "CAMERA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "CAMERA", "recog_valid": true, "glyph_recog_text": "CAMERA", "glyph_recog_ld": 1.0}, {"polygon": [[301, 383], [298, 407], [392, 443], [394, 425]], "text": "Security Camera", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SECURITY", "recog_valid": false, "glyph_recog_text": "Security Camera", "glyph_recog_ld": 0.1333339111107259}, {"polygon": [[319, 339], [320, 360], [383, 387], [383, 366]], "text": "NYPD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NYPD", "recog_valid": true, "glyph_recog_text": "NYPD", "glyph_recog_ld": 1.0}, {"polygon": [[362, 315], [365, 336], [392, 350], [392, 336]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[317, 285], [317, 300], [349, 323], [349, 305]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[283, 251], [276, 273], [312, 253], [312, 239]], "text": "Ave", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ave", "recog_valid": true, "glyph_recog_text": "Ave", "glyph_recog_ld": 1.0}, {"polygon": [[212, 292], [212, 310], [273, 276], [274, 259]], "text": "Seventh", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Seventh", "recog_valid": true, "glyph_recog_text": "Seventh", "glyph_recog_ld": 1.0}, {"polygon": [[183, 67], [181, 92], [242, 144], [245, 125]], "text": "West", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "West", "recog_valid": true, "glyph_recog_text": "West", "glyph_recog_ld": 1.0}, {"polygon": [[254, 131], [247, 152], [288, 181], [295, 166]], "text": "35th", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "35th", "recog_valid": true, "glyph_recog_text": "36th", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413012.jpg", "caption": "two women dressed in costumes and talking on the phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413011.jpg", "caption": "a black and white cat laying on top of a toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544089.jpg", "caption": "a vintage advertisement for the coffee house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281952.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[224, 144], [232, 139], [244, 134], [262, 130], [278, 130], [286, 131], [284, 151], [270, 149], [252, 151], [241, 155], [232, 160]], "text": "POSEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "POSEY", "recog_valid": true, "glyph_recog_text": "POSEY", "glyph_recog_ld": 1.0}, {"polygon": [[216, 172], [225, 223], [308, 207], [299, 150]], "text": "28", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "28", "recog_valid": true, "glyph_recog_text": "28", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413037.jpg", "caption": "a piece of meat pie on a plate with a fork and spoon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281994.jpg", "caption": "a red and white truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150924.jpg", "caption": "elephants in the desert, mali, africa", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019863.jpg", "caption": "a train engine is parked in a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150942.jpg", "caption": "a large clock with people walking around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282015.jpg", "caption": "a person on a dirt bike in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019874.jpg", "caption": "a parking meter on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282042.jpg", "caption": "two women walking down the street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019899.jpg", "caption": "a street sign with a no parking sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282048.jpg", "caption": "a man sitting on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150981.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[2, 201], [117, 104], [158, 111], [0, 247]], "text": "OWDOWN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "WDOWN", "recog_valid": false, "glyph_recog_text": ")WDOWN", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[7, 267], [57, 222], [74, 228], [20, 276]], "text": "JEAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "JEAN", "recog_valid": true, "glyph_recog_text": "JEAN", "glyph_recog_ld": 1.0}, {"polygon": [[67, 214], [110, 176], [128, 181], [86, 222]], "text": "KING", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "KING", "recog_valid": true, "glyph_recog_text": "KING", "glyph_recog_ld": 1.0}, {"polygon": [[121, 167], [156, 136], [173, 142], [139, 174]], "text": "CUP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CUP", "recog_valid": true, "glyph_recog_text": "CUP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282069.jpg", "caption": "a black and white photo of a baseball team", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019926.jpg", "caption": "three people sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413154.jpg", "caption": "a man in a suit and tie giving a speech", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151016.jpg", "caption": "a person cutting a cake with a knife", "annotations": [{"polygon": [[233, 307], [235, 302], [237, 302], [242, 292], [245, 292], [249, 282], [247, 282], [247, 280], [250, 280], [252, 280], [255, 273], [268, 274], [264, 278], [262, 282], [261, 285], [262, 287], [262, 289], [260, 290], [258, 291], [257, 296], [254, 298], [251, 298], [249, 303], [250, 305], [250, 307], [246, 309]], "text": "giggle", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bisgle", "recog_valid": false, "glyph_recog_text": "giggle", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[258, 265], [259, 257], [255, 257], [255, 255], [263, 255], [269, 254], [283, 229], [294, 230], [272, 265]], "text": "bloom", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "oloom", "recog_valid": false, "glyph_recog_text": "bloom", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544240.jpg", "caption": "a baseball player standing on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282099.jpg", "caption": "a group of white cows standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151049.jpg", "caption": "a blue and purple train sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151050.jpg", "caption": "a stop sign on the side of the road", "annotations": [{"polygon": [[294, 196], [295, 230], [375, 230], [379, 195], [294, 195]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[258, 244], [260, 266], [318, 265], [324, 273], [327, 269], [324, 254], [323, 252], [306, 250], [276, 242], [258, 242], [258, 245]], "text": "Throwing", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Throwing", "recog_valid": true, "glyph_recog_text": "Throwing", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019980.jpg", "caption": "a laptop computer with a television on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282142.jpg", "caption": "a little girl and a little boy looking at a horse", "annotations": [{"polygon": [[242, 388], [262, 411], [220, 450], [203, 431]], "text": "Beat", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "yoond", "recog_valid": false, "glyph_recog_text": "Beat", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020012.jpg", "caption": "a man is playing tennis on a clay court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544307.jpg", "caption": "a woman hugging a large teddy bear", "annotations": [{"polygon": [[397, 77], [392, 108], [453, 116], [459, 84]], "text": "HERE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "HERE", "recog_valid": true, "glyph_recog_text": "HERE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151103.jpg", "caption": "a pug dog standing on a table with a bottle of beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544325.jpg", "caption": "a baseball player standing on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544329.jpg", "caption": "a table with a lobster on it and a knife", "annotations": [{"polygon": [[226, 245], [226, 245], [282, 287], [265, 303], [209, 256]], "text": "CHICK-HILIDC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "D", "recog_valid": false, "glyph_recog_text": "CHCK-HILIDC", "glyph_recog_ld": 0.0909099173546205}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151115.jpg", "caption": "a stop sign and a hotel sign", "annotations": [{"polygon": [[319, 85], [353, 78], [364, 296], [324, 303]], "text": "HOTEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EOF", "recog_valid": false, "glyph_recog_text": "工OFWJ", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[104, 331], [88, 394], [260, 408], [265, 347]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282208.jpg", "caption": "a person on a dirt bike doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282220.jpg", "caption": "a woman standing next to a car in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151196.jpg", "caption": "a truck with a large group of cyclists on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020133.jpg", "caption": "a table with a banana, a cell phone, and a solar charger", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282293.jpg", "caption": "a black and white photo of people walking on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151229.jpg", "caption": "a cat laying on a bed with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282310.jpg", "caption": "a baseball player sliding into home plate", "annotations": [{"polygon": [[131, 235], [152, 248], [139, 269], [118, 257]], "text": "MS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "中时", "recog_valid": false, "glyph_recog_text": "望", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[330, 224], [366, 196], [386, 185], [413, 225], [415, 228], [363, 262]], "text": "35", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "35", "recog_valid": true, "glyph_recog_text": "35", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020172.jpg", "caption": "a train is pulling into a station with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151250.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020182.jpg", "caption": "a truck with a crane on it parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282329.jpg", "caption": "a man sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151259.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282343.jpg", "caption": "a red and yellow double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151273.jpg", "caption": "a woman is skiing down a snowy road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020202.jpg", "caption": "a man preparing food in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282357.jpg", "caption": "a man standing in a market at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544502.jpg", "caption": "a man holding a tennis racket and a tennis ball", "annotations": [{"polygon": [[383, 175], [383, 175], [367, 196], [419, 232], [435, 211]], "text": "BNR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BNP", "recog_valid": false, "glyph_recog_text": "BNR", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[446, 217], [432, 237], [510, 291], [511, 259]], "text": "PARIB", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "PARIb", "recog_valid": false, "glyph_recog_text": "PARIB", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[257, 102], [249, 112], [294, 144], [302, 134]], "text": "Cincinnati", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Cincinnati", "recog_valid": true, "glyph_recog_text": "Cincinnatt", "glyph_recog_ld": 0.9000000999999}, {"polygon": [[220, 97], [261, 125], [253, 138], [211, 108]], "text": "Shanghai", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Shanghai", "recog_valid": true, "glyph_recog_text": "Shangha", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151287.jpg", "caption": "a man holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151289.jpg", "caption": "two silver laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151300.jpg", "caption": "a man sitting on a blue bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282380.jpg", "caption": "a man riding a surfboard in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413466.jpg", "caption": "a plane taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151324.jpg", "caption": "a bump sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282415.jpg", "caption": "a man is standing next to a van with a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151345.jpg", "caption": "a motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413496.jpg", "caption": "a fire hydrant sitting next to bags of garbage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151352.jpg", "caption": "a group of people skiing down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544568.jpg", "caption": "a young man eating a hot dog in a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282428.jpg", "caption": "a train on a city street with people walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151353.jpg", "caption": "a softball player swinging at a ball", "annotations": [{"polygon": [[299, 210], [293, 214], [290, 209], [289, 196], [295, 185], [317, 176], [330, 173], [338, 181], [328, 192], [322, 196], [309, 204]], "text": "Cabre", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cals", "recog_valid": false, "glyph_recog_text": "Cabre", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020281.jpg", "caption": "a clock mounted on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282440.jpg", "caption": "a blue frisbee sitting on a black box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282450.jpg", "caption": "a little girl in a car with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282464.jpg", "caption": "a clock tower in a town square with people walking around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151396.jpg", "caption": "a baseball player running to first base", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151406.jpg", "caption": "food trucks at the fair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282527.jpg", "caption": "a dog sitting in a motorcycle seat", "annotations": [{"polygon": [[223, 401], [259, 396], [266, 390], [276, 389], [298, 391], [304, 411], [261, 422], [224, 420], [223, 410]], "text": "co-pilot", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "co-pilot", "recog_valid": true, "glyph_recog_text": "co-pilot", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151455.jpg", "caption": "a green double decker bus parked in a parking lot", "annotations": [{"polygon": [[400, 216], [402, 246], [441, 254], [440, 230], [409, 219]], "text": "Green", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Green", "recog_valid": true, "glyph_recog_text": "Green", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020398.jpg", "caption": "a yellow fire hydrant on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544687.jpg", "caption": "a black suitcase filled with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413623.jpg", "caption": "australian fans at the australian open", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282558.jpg", "caption": "a little girl in a pink outfit is standing on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282567.jpg", "caption": "a black and white photo of a train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282568.jpg", "caption": "two men jumping in the air to catch a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020464.jpg", "caption": "an old photo of a horse drawn carriage with people standing around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151563.jpg", "caption": "a man in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413713.jpg", "caption": "a baseball game is being played on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413716.jpg", "caption": "two men cutting a cake in front of an american flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151577.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[50, 82], [50, 82], [107, 180], [112, 155], [58, 64]], "text": "Alexanderstrae", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "AHlexandehra", "recog_valid": false, "glyph_recog_text": "Alesxanderstrae", "glyph_recog_ld": 0.6000002666664889}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413723.jpg", "caption": "a small car parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020515.jpg", "caption": "a traffic light and a street sign in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282667.jpg", "caption": "a person cutting up some greens in a bowl", "annotations": [{"polygon": [[373, 64], [369, 85], [398, 86], [437, 90], [469, 95], [512, 107], [514, 86], [476, 80], [447, 73], [406, 67]], "text": "INTERCHANGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "INERCHANGI", "recog_valid": false, "glyph_recog_text": "INTERCHANGE", "glyph_recog_ld": 0.8181819834709241}, {"polygon": [[281, 246], [274, 269], [335, 297], [342, 273]], "text": "TAES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DES", "recog_valid": false, "glyph_recog_text": "TAES", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[384, 322], [382, 334], [404, 340], [422, 346], [437, 358], [441, 344], [419, 331]], "text": "SCHOOL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SCHO", "recog_valid": false, "glyph_recog_text": "SCHOOL", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020524.jpg", "caption": "a coffee mug with a picture of flowers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544815.jpg", "caption": "a display of masks in a window", "annotations": [{"polygon": [[132, 350], [98, 366], [102, 402], [144, 387]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "13", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[119, 318], [93, 332], [100, 370], [118, 363], [131, 344], [123, 315]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "e", "recog_valid": false, "glyph_recog_text": "12", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020528.jpg", "caption": "a toy kitchen with a red kettle and a christmas tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544819.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413754.jpg", "caption": "a baseball game in progress with a batter at bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282703.jpg", "caption": "a variety of supplies including glue, scissors, and glue stick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151637.jpg", "caption": "a woman holding a wii remote in her hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413782.jpg", "caption": "a bench covered in snow in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151646.jpg", "caption": "a group of elephants walking along a road", "annotations": [{"polygon": [[76, 173], [76, 202], [124, 204], [125, 173]], "text": "ING", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NG", "recog_valid": false, "glyph_recog_text": "ING", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[267, 173], [267, 207], [411, 208], [411, 176]], "text": "NUM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NUM", "recog_valid": true, "glyph_recog_text": "NUM", "glyph_recog_ld": 1.0}, {"polygon": [[437, 175], [437, 209], [512, 210], [513, 176]], "text": "BAIL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BAII", "recog_valid": false, "glyph_recog_text": "BAIL", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151655.jpg", "caption": "two people playing frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282730.jpg", "caption": "a large clock tower with a large clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020601.jpg", "caption": "a table with a sandwich and a bowl of soup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020611.jpg", "caption": "three snowboarders on a slope with flags", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282766.jpg", "caption": "a large airplane taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020644.jpg", "caption": "a baseball game with a batter at bat, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151722.jpg", "caption": "an old fashioned stove with pots and pans on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151732.jpg", "caption": "a group of people sitting on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151756.jpg", "caption": "a young boy swinging a baseball bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413903.jpg", "caption": "a grassy field with various items laid out on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413918.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151777.jpg", "caption": "a group of young girls playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413923.jpg", "caption": "a woman walking down the street with a bag on her back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413929.jpg", "caption": "an old black and white photo of a busy city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413947.jpg", "caption": "a yellow and orange bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413975.jpg", "caption": "a traffic light on a street corner at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545066.jpg", "caption": "a man sitting on a toilet with a beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413996.jpg", "caption": "a large blue and white ship", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282926.jpg", "caption": "a man is playing tennis on a clay court", "annotations": [{"polygon": [[298, 60], [297, 95], [377, 92], [378, 58]], "text": "IBM", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "IEM", "recog_valid": false, "glyph_recog_text": "IBM", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545071.jpg", "caption": "a box with a bunch of tools and other items on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413999.jpg", "caption": "wikileaks top secret mobile phone", "annotations": [{"polygon": [[143, 150], [148, 201], [339, 221], [339, 179]], "text": "WikiLeaks", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WikiLeaks", "recog_valid": true, "glyph_recog_text": "WikiLeaks", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151859.jpg", "caption": "a man in red shirt doing a skateboard trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151869.jpg", "caption": "a microwave with a bottle of milk and a bottle of beer", "annotations": [{"polygon": [[209, 131], [214, 152], [266, 141], [255, 120]], "text": "MIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MI", "recog_valid": false, "glyph_recog_text": "MIS", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414022.jpg", "caption": "a stop sign is on a road with wind turbines in the background", "annotations": [{"polygon": [[358, 164], [380, 161], [409, 155], [412, 161], [413, 166], [403, 182], [399, 182], [359, 188], [355, 187], [354, 183], [355, 174], [356, 167]], "text": "STOOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": false, "glyph_recog_text": "STOOP", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545116.jpg", "caption": "a group of people standing on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151900.jpg", "caption": "a black bag with various electronic devices and accessories", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545124.jpg", "caption": "air canada airbus a320-2142-2cw at toronto airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020839.jpg", "caption": "a street with a traffic light and buildings in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282982.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[148, 183], [194, 189], [185, 224], [173, 226], [157, 222], [147, 214]], "text": "32", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "32", "recog_valid": true, "glyph_recog_text": "32", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545128.jpg", "caption": "conteste in italia - italy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020857.jpg", "caption": "a plate with bananas, eggs and a bag of flour", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020860.jpg", "caption": "a teddy bear with a red shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414082.jpg", "caption": "a clock on a wall", "annotations": [{"polygon": [[356, 481], [361, 512], [448, 511], [454, 481]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "", "recog_valid": false, "glyph_recog_text": "W", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[346, 335], [348, 407], [468, 406], [468, 406], [456, 339]], "text": "IR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "IR", "recog_valid": true, "glyph_recog_text": "IR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545165.jpg", "caption": "a man playing tennis on a clay court", "annotations": [{"polygon": [[18, 104], [22, 133], [177, 111], [174, 87]], "text": "airberline.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "airberlin.com", "recog_valid": false, "glyph_recog_text": "airberline.com", "glyph_recog_ld": 0.9285714795918003}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414106.jpg", "caption": "a semi truck with a large flatbed trailer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414113.jpg", "caption": "a police officer riding a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283045.jpg", "caption": "a man holding a tennis racquet on a tennis court", "annotations": [{"polygon": [[98, 243], [97, 275], [188, 275], [187, 244]], "text": "ULL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YULL", "recog_valid": false, "glyph_recog_text": "ULL", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[330, 220], [329, 256], [425, 257], [423, 221]], "text": "QUA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "QUA", "recog_valid": true, "glyph_recog_text": "QUA", "glyph_recog_ld": 1.0}, {"polygon": [[348, 270], [348, 300], [426, 303], [427, 271]], "text": "SUI", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SUI", "recog_valid": true, "glyph_recog_text": "SUI", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414130.jpg", "caption": "a man sitting in a living room with a laptop and a tv", "annotations": [{"polygon": [[182, 86], [182, 115], [209, 109], [233, 111], [255, 117], [257, 96], [191, 86]], "text": "RUN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BUN", "recog_valid": false, "glyph_recog_text": "RUN", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545209.jpg", "caption": "a man riding a surfboard on a wave", "annotations": [{"polygon": [[20, 23], [41, 9], [93, 20], [135, 50], [137, 92], [92, 103], [37, 86]], "text": "TLoATiNg", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "a", "recog_valid": false, "glyph_recog_text": "TLoATiNg", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[148, 21], [148, 21], [154, 17], [194, 24], [205, 45], [195, 79], [146, 76]], "text": "LEAT ", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "国", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152020.jpg", "caption": "a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545237.jpg", "caption": "a purple sky with a clock tower in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152038.jpg", "caption": "an old car sits in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152041.jpg", "caption": "a train pulling into a station with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545257.jpg", "caption": "a man sitting on a cart with fruit and vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283122.jpg", "caption": "a pair of motorcycles in a garage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020983.jpg", "caption": "two men in suits standing next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283129.jpg", "caption": "a man is picking oranges from a crate", "annotations": [{"polygon": [[400, 417], [394, 447], [426, 452], [427, 424]], "text": "D", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "D", "recog_valid": true, "glyph_recog_text": "D", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414202.jpg", "caption": "a model of a parking lot with cars and trucks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152070.jpg", "caption": "a group of remotes and calculators", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152090.jpg", "caption": "a refrigerator and a trash can in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545310.jpg", "caption": "a sign for a park in the middle of a city", "annotations": [{"polygon": [[235, 153], [232, 169], [289, 196], [292, 183], [270, 170]], "text": "Parking", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Parking", "recog_valid": true, "glyph_recog_text": "Parking", "glyph_recog_ld": 1.0}, {"polygon": [[226, 57], [224, 69], [285, 101], [281, 87]], "text": "CENTRAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CENTRAL", "recog_valid": true, "glyph_recog_text": "CENTPAL", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[287, 397], [287, 415], [355, 400], [355, 381]], "text": "BOGRASHOV", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "BOGRASHOV", "recog_valid": true, "glyph_recog_text": "BOGRASHOV", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283170.jpg", "caption": "a bus and a motorcycle are driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283180.jpg", "caption": "a red train at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545334.jpg", "caption": "a baseball player throwing a pitch on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414279.jpg", "caption": "a person laying on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283208.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283220.jpg", "caption": "a street sign for floyd and linden", "annotations": [{"polygon": [[91, 255], [201, 269], [205, 300], [186, 300], [92, 291]], "text": "FLOYD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FLOYD", "recog_valid": true, "glyph_recog_text": "FLOYD", "glyph_recog_ld": 1.0}, {"polygon": [[329, 239], [339, 274], [387, 203], [377, 167]], "text": "LINDEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LNDEN", "recog_valid": false, "glyph_recog_text": "LINDEN", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021079.jpg", "caption": "a group of people sitting around a table eating food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021084.jpg", "caption": "a large jetliner flying low over the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545381.jpg", "caption": "a man and woman sitting at a table with a burrito", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545394.jpg", "caption": "a big bus with the words big bus london on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021136.jpg", "caption": "a bus is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545428.jpg", "caption": "a dog laying on a bed with papers and a stuffed animal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021151.jpg", "caption": "a beautiful woman in a leather outfit sitting on a red motorcycle", "annotations": [{"polygon": [[279, 412], [269, 476], [272, 491], [271, 496], [247, 488], [249, 471], [254, 461], [261, 459], [267, 410]], "text": "rizoma", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ewoz!!!", "recog_valid": false, "glyph_recog_text": "L . NC", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152237.jpg", "caption": "a young girl sitting on a bunk bed", "annotations": [{"polygon": [[398, 384], [426, 376], [511, 390], [511, 449], [511, 449], [455, 440], [391, 406]], "text": "JP", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "9T", "recog_valid": false, "glyph_recog_text": "JP", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283309.jpg", "caption": "a train traveling through a field of yellow flowers", "annotations": [{"polygon": [[166, 225], [166, 225], [201, 242], [206, 231], [168, 212]], "text": "SMART", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SIOART", "recog_valid": false, "glyph_recog_text": "SMART", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283312.jpg", "caption": "a blue and white airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021169.jpg", "caption": "a woman holding a pair of scissors in front of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152247.jpg", "caption": "a train is pulling into a station with a train track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283329.jpg", "caption": "bananas are being sorted at a factory", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283333.jpg", "caption": "two women are playing a video game in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414416.jpg", "caption": "a street sign with a pedestrian crossing sign", "annotations": [{"polygon": [[205, 371], [199, 384], [203, 395], [210, 399], [311, 393], [321, 400], [333, 396], [333, 368]], "text": "Crossing", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Crossing", "recog_valid": true, "glyph_recog_text": "Crossing", "glyph_recog_ld": 1.0}, {"polygon": [[303, 48], [306, 88], [319, 92], [322, 80], [340, 82], [343, 70], [334, 57]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "P", "recog_valid": true, "glyph_recog_text": "P", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152277.jpg", "caption": "a clock with black hands and numbers", "annotations": [{"polygon": [[216, 102], [263, 110], [263, 150], [217, 139]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "12", "glyph_recog_ld": 1.0}, {"polygon": [[337, 192], [369, 195], [370, 237], [338, 230]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "C", "recog_valid": false, "glyph_recog_text": "N", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[365, 265], [391, 265], [401, 309], [365, 309]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "60", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[386, 375], [348, 372], [349, 324], [385, 327]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "寸", "recog_valid": false, "glyph_recog_text": "寸", "glyph_recog_ld": 1.0}, {"polygon": [[308, 375], [342, 375], [347, 426], [309, 426]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "LO", "recog_valid": false, "glyph_recog_text": "LO", "glyph_recog_ld": 1.0}, {"polygon": [[237, 426], [275, 427], [270, 386], [233, 386]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "R", "recog_valid": false, "glyph_recog_text": "6", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[162, 356], [205, 360], [187, 414], [162, 412]], "text": "7", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "C.", "recog_valid": false, "glyph_recog_text": "卜", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[94, 285], [135, 285], [139, 342], [91, 338]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "8", "recog_valid": true, "glyph_recog_text": "8", "glyph_recog_ld": 1.0}, {"polygon": [[71, 200], [113, 206], [109, 257], [63, 249]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "9", "recog_valid": true, "glyph_recog_text": "9", "glyph_recog_ld": 1.0}, {"polygon": [[96, 143], [144, 152], [143, 197], [94, 185]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "10", "recog_valid": true, "glyph_recog_text": "10", "glyph_recog_ld": 1.0}, {"polygon": [[151, 103], [189, 113], [191, 152], [152, 142], [152, 142]], "text": "11", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "11", "recog_valid": true, "glyph_recog_text": "11", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021222.jpg", "caption": "a row of buses parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414439.jpg", "caption": "a teddy bear sitting next to a clock", "annotations": [{"polygon": [[378, 284], [403, 288], [408, 258], [379, 258]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "IO", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152309.jpg", "caption": "a woman in white tennis dress holding a tennis racket", "annotations": [{"polygon": [[197, 170], [197, 170], [194, 177], [203, 188], [215, 199], [221, 206], [223, 206], [225, 199], [223, 196], [215, 190], [208, 184]], "text": "3LX", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": ":sx", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[126, 120], [114, 150], [143, 169], [150, 169], [155, 166], [162, 153], [160, 147], [146, 136]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021253.jpg", "caption": "a bus driving down a street with traffic", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021254.jpg", "caption": "a box of donuts", "annotations": [{"polygon": [[205, 185], [261, 177], [347, 162], [348, 144], [335, 129], [221, 147], [207, 172]], "text": "J.CO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Jco", "recog_valid": false, "glyph_recog_text": "J.CO", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[171, 198], [168, 162], [114, 164], [93, 180], [107, 208]], "text": "16", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "16", "recog_valid": true, "glyph_recog_text": "16", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414471.jpg", "caption": "a woman with a red heart in her hands", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021257.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414479.jpg", "caption": "a person holding a piece of bread and spaghetti", "annotations": [{"polygon": [[16, 76], [33, 72], [44, 74], [67, 66], [110, 74], [123, 77], [121, 107], [108, 106], [95, 94], [83, 85], [55, 85], [39, 91], [37, 101], [21, 109]], "text": "BUNDABERG", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "BJENDADNG", "recog_valid": false, "glyph_recog_text": "BUNDABERG", "glyph_recog_ld": 0.44444506172770915}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021269.jpg", "caption": "a stop sign on a pole", "annotations": [{"polygon": [[239, 62], [304, 63], [305, 92], [240, 92]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414492.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414499.jpg", "caption": "three men standing in the snow holding snowboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283456.jpg", "caption": "a cat laying on a keyboard", "annotations": [{"polygon": [[343, 399], [443, 348], [409, 328], [327, 380]], "text": "GSAND", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LM6SAD", "recog_valid": false, "glyph_recog_text": "GSAND", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021313.jpg", "caption": "a cat laying on a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021330.jpg", "caption": "a stop sign on a street corner with a building in the background", "annotations": [{"polygon": [[189, 224], [189, 224], [185, 263], [185, 263], [198, 275], [198, 275], [306, 273], [306, 273], [319, 238], [319, 238], [311, 224], [311, 224], [233, 222], [233, 222], [195, 222], [195, 222]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152425.jpg", "caption": "a young boy swinging a baseball bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414571.jpg", "caption": "a plate with rice, broccoli and a drink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152427.jpg", "caption": "a red double decker bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152431.jpg", "caption": "a group of people sitting on a rock in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414577.jpg", "caption": "a book sitting on a desk with a keyboard and mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283530.jpg", "caption": "a group of trucks parked in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283534.jpg", "caption": "a woman is playing tennis on a court", "annotations": [{"polygon": [[312, 21], [312, 63], [394, 70], [396, 22]], "text": "IBM", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "IBM", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[265, 202], [282, 186], [296, 208], [277, 226]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[298, 178], [298, 198], [395, 210], [395, 187]], "text": "usope", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "usope or", "recog_valid": false, "glyph_recog_text": "usope", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[343, 79], [343, 121], [381, 122], [386, 104], [380, 80]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "9", "recog_valid": true, "glyph_recog_text": "9", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545675.jpg", "caption": "a bus driving down a street with a few buildings in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414624.jpg", "caption": "a man and woman standing next to a car with writing on it", "annotations": [{"polygon": [[105, 253], [98, 280], [120, 291], [134, 293], [179, 273], [168, 267]], "text": "S.A", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SS", "recog_valid": false, "glyph_recog_text": "S.A", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[259, 461], [244, 483], [340, 511], [357, 490]], "text": "2001", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "2300", "recog_valid": false, "glyph_recog_text": "2001", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[245, 431], [230, 451], [276, 463], [356, 475], [360, 457]], "text": "ICA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "LO", "recog_valid": false, "glyph_recog_text": "IC A", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[334, 385], [325, 398], [419, 428], [424, 406]], "text": "PRIME", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "peiMC", "recog_valid": false, "glyph_recog_text": "PRIME", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021417.jpg", "caption": "a man on a horse is herding cattle in the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152502.jpg", "caption": "a man laying on the snow", "annotations": [{"polygon": [[452, 249], [485, 448], [450, 448], [417, 253]], "text": "URTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "uoian", "recog_valid": false, "glyph_recog_text": "S&-oz", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152514.jpg", "caption": "a stuffed teddy bear with a note attached to it", "annotations": [{"polygon": [[170, 368], [165, 355], [210, 326], [215, 338]], "text": "myluck", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PYIUOK", "recog_valid": false, "glyph_recog_text": "myluck", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[116, 400], [165, 371], [168, 387], [121, 414]], "text": "TeDDy", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Z2DDY", "recog_valid": false, "glyph_recog_text": "TeDDy", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[168, 369], [201, 350], [205, 367], [171, 385]], "text": "Bear", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bewr", "recog_valid": false, "glyph_recog_text": "Bear", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[176, 505], [186, 513], [196, 513], [234, 492], [223, 482], [192, 493]], "text": "Tanika", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Taoiba", "recog_valid": false, "glyph_recog_text": "Tanika", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[175, 481], [174, 491], [226, 467], [231, 452], [210, 460]], "text": "number 1", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Diu22be", "recog_valid": false, "glyph_recog_text": "numbert", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[184, 426], [186, 441], [220, 423], [215, 410]], "text": "have", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "2a1e", "recog_valid": false, "glyph_recog_text": "have", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[196, 403], [197, 417], [228, 397], [225, 381]], "text": "want", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "a1/", "recog_valid": false, "glyph_recog_text": "want", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[144, 424], [147, 439], [175, 426], [172, 408]], "text": "sand", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "and", "recog_valid": false, "glyph_recog_text": "sand", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[180, 385], [182, 403], [227, 375], [225, 365], [204, 372]], "text": "Beat Jackser ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Iackscn", "recog_valid": false, "glyph_recog_text": "Bauar-jedee", "glyph_recog_ld": 0.0909099173546205}, {"polygon": [[123, 414], [121, 430], [179, 402], [179, 386]], "text": "Micheal", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Miehea/", "recog_valid": false, "glyph_recog_text": "Micheal", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[133, 350], [133, 363], [183, 335], [184, 320], [157, 332]], "text": "Jackson", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "sacbsoi", "recog_valid": false, "glyph_recog_text": "Jackson", "glyph_recog_ld": 0.5714291836725947}, {"polygon": [[122, 331], [125, 355], [179, 316], [174, 304]], "text": "Jackson", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Micheal", "recog_valid": false, "glyph_recog_text": "Jackson", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283620.jpg", "caption": "icon ai-1, ai-1a, ai-1b, ai-1c, ai-1d, ai", "annotations": [{"polygon": [[45, 98], [45, 119], [138, 134], [137, 117]], "text": "ICON", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "icon", "recog_valid": false, "glyph_recog_text": "ICON", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283624.jpg", "caption": "a woman and her dog sitting at a table with a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283650.jpg", "caption": "a woman holding skis and standing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152578.jpg", "caption": "a man and three children sitting around a birthday cake", "annotations": [{"polygon": [[132, 402], [150, 410], [175, 399], [200, 392], [182, 380], [131, 401]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[134, 418], [151, 429], [228, 395], [238, 400], [239, 395], [218, 386], [194, 391], [134, 417]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "sHhian", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.2500009374988281}, {"polygon": [[172, 424], [190, 435], [229, 417], [236, 411], [244, 415], [246, 412], [224, 401], [172, 423]], "text": "Daddy", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Dao84", "recog_valid": false, "glyph_recog_text": "Daddy", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414723.jpg", "caption": "a bathroom mirror with a sign on it", "annotations": [{"polygon": [[367, 37], [304, 74], [273, 420], [329, 376], [345, 214]], "text": "BUSCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "H520", "recog_valid": false, "glyph_recog_text": "m一奶口工", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152588.jpg", "caption": "a man in blue shirt catching a dog in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021528.jpg", "caption": "a man and a woman riding bicycles down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021534.jpg", "caption": "a man standing at a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283678.jpg", "caption": "a baseball player swinging at a pitch during a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414764.jpg", "caption": "a large green truck with a container on the back", "annotations": [{"polygon": [[105, 224], [107, 248], [303, 234], [301, 199]], "text": "U A S C", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UAS C", "recog_valid": false, "glyph_recog_text": "UASC", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[512, 359], [433, 388], [348, 377], [430, 353]], "text": "NMSR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "6NMSR", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[315, 385], [240, 407], [240, 410], [305, 431], [318, 431], [387, 406], [396, 400]], "text": "A46", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "A 4 6N", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[271, 380], [175, 411], [125, 390], [212, 367]], "text": "A4CE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "A4G", "recog_valid": false, "glyph_recog_text": "A4CE", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283697.jpg", "caption": "a desk with a laptop, a magazine, a mouse, a pen, a book, a pen holder, a cell phone, a wallet, a cell phone", "annotations": [{"polygon": [[345, 90], [347, 126], [396, 126], [467, 126], [464, 93], [403, 98], [370, 91]], "text": "Macworld", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Macworld", "recog_valid": true, "glyph_recog_text": "Macworld", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545844.jpg", "caption": "a pizza with mussels and other seafood on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545849.jpg", "caption": "a coffee cup sitting on a desk next to a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545850.jpg", "caption": "a red double decker bus is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152633.jpg", "caption": "a young boy in a soccer uniform is kicking the ball", "annotations": [{"polygon": [[112, 82], [109, 116], [220, 121], [223, 91]], "text": "Sai", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Sanitg", "recog_valid": false, "glyph_recog_text": "Sai", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021569.jpg", "caption": "a truck driving down a road near a forest", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414787.jpg", "caption": "a bathroom with a toilet, sink and shower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545859.jpg", "caption": "a man driving a horse drawn carriage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152677.jpg", "caption": "a bed with a map on it and a book on the floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414822.jpg", "caption": "a group of people standing around a luggage carousel", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152689.jpg", "caption": "a blue bus driving down the street", "annotations": [{"polygon": [[217, 332], [217, 332], [241, 295], [235, 294], [211, 329]], "text": "cia", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "eUn", "recog_valid": false, "glyph_recog_text": "..", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414838.jpg", "caption": "a woman sitting at a table with a cake with lit candles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414851.jpg", "caption": "a bus driving down the street with a car behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021632.jpg", "caption": "two people dressed up as dogs near a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152711.jpg", "caption": "a police officer stands next to a truck on a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152725.jpg", "caption": "a bus is parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152746.jpg", "caption": "a man is adjusting a tie on another man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283818.jpg", "caption": "a cat is standing on top of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021674.jpg", "caption": "a man in a suit and hat is singing into a microphone", "annotations": [{"polygon": [[476, 335], [466, 347], [499, 371], [505, 358]], "text": "BLUES", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BLUES", "recog_valid": true, "glyph_recog_text": "BLLES", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414916.jpg", "caption": "a group of people are cutting up a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021717.jpg", "caption": "a man wearing a birthday hat and a woman wearing a birthday hat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283867.jpg", "caption": "two trains are on the tracks next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283870.jpg", "caption": "a man riding a wave on a surfboard", "annotations": [{"polygon": [[136, 135], [138, 105], [240, 110], [240, 137]], "text": "Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "heegraply", "recog_valid": false, "glyph_recog_text": "Photograph", "glyph_recog_ld": 0.40000059999939996}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546018.jpg", "caption": "a table with a banana and a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414947.jpg", "caption": "a person holding a wii remote in their hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414954.jpg", "caption": "a woman in a yellow shirt and white skirt holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283884.jpg", "caption": "a train traveling through the mountains with a mountain in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021747.jpg", "caption": "a young boy eating a piece of pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546046.jpg", "caption": "three double decker buses parked in a row", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283913.jpg", "caption": "a truck parked in front of a mcdonalds restaurant", "annotations": [{"polygon": [[261, 240], [261, 256], [223, 274], [223, 259]], "text": "McDonald's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "07", "recog_valid": false, "glyph_recog_text": "MaDonsds", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414999.jpg", "caption": "a baseball game in progress with a batter at bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546078.jpg", "caption": "a woman and a child standing in front of a purple double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546085.jpg", "caption": "a black and green motorcycle parked on the side of the road", "annotations": [{"polygon": [[377, 232], [370, 245], [392, 256], [399, 264], [411, 253]], "text": "250", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "250", "recog_valid": true, "glyph_recog_text": "250", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283956.jpg", "caption": "a slice of pizza on a plate with a fork", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283960.jpg", "caption": "a group of people cutting a cake at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283961.jpg", "caption": "a street with cars parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415042.jpg", "caption": "a model airplane on a desk", "annotations": [{"polygon": [[178, 164], [192, 198], [274, 181], [262, 153]], "text": "KINGFISHER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YDPGPSE", "recog_valid": false, "glyph_recog_text": "KINGFISHER", "glyph_recog_ld": 0.3000006999993}, {"polygon": [[260, 347], [312, 290], [318, 306], [262, 366]], "text": "KINGFISHER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KINGFISHER", "recog_valid": true, "glyph_recog_text": "KINGFISHER", "glyph_recog_ld": 1.0}, {"polygon": [[273, 356], [275, 360], [309, 322], [306, 317]], "text": "AIRLINES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AZTIINI", "recog_valid": false, "glyph_recog_text": "trleelre", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[102, 436], [103, 445], [152, 470], [150, 458]], "text": "KINGFISHER", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Nagosaa", "recog_valid": false, "glyph_recog_text": "N", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283985.jpg", "caption": "a baseball player in a striped uniform swinging a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152913.jpg", "caption": "a stop sign and a statue of a lion and elephant", "annotations": [{"polygon": [[167, 241], [227, 246], [228, 271], [168, 269], [167, 242]], "text": "Park", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Park", "recog_valid": true, "glyph_recog_text": "Park", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415067.jpg", "caption": "soccer players are jumping to block the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546151.jpg", "caption": "a bus parked on a street with people sitting on the side", "annotations": [{"polygon": [[222, 222], [224, 244], [278, 222], [287, 218], [282, 188]], "text": "Mendo3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "sMendeg", "recog_valid": false, "glyph_recog_text": "Mendo3", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284012.jpg", "caption": "a baseball game with a batter and catcher in the batter's box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284018.jpg", "caption": "cars driving down a city street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152958.jpg", "caption": "a man standing on the corner of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415109.jpg", "caption": "a man in white jacket holding a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152992.jpg", "caption": "a shower stall with a toilet and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153008.jpg", "caption": "british airways plane takes off from london's london airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153031.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153039.jpg", "caption": "a group of people in green shirts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021972.jpg", "caption": "a united airlines airplane taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415190.jpg", "caption": "virgin airlines new york city airport", "annotations": [{"polygon": [[28, 74], [28, 106], [110, 105], [111, 74]], "text": "NET", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "NET", "recog_valid": true, "glyph_recog_text": "NET", "glyph_recog_ld": 1.0}, {"polygon": [[16, 278], [20, 304], [53, 305], [48, 287], [31, 273]], "text": "Virgin", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "vep", "recog_valid": false, "glyph_recog_text": "Y", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153052.jpg", "caption": "a baseball game with a crowd of people watching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021981.jpg", "caption": "a scooter parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021983.jpg", "caption": "a group of men playing a game of rugby", "annotations": [{"polygon": [[95, 213], [95, 213], [104, 212], [126, 215], [130, 218], [127, 224], [118, 250], [104, 251], [82, 249]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "7", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284127.jpg", "caption": "a pair of headphones and a pair of scissors on a table", "annotations": [{"polygon": [[196, 173], [202, 170], [229, 199], [223, 201]], "text": "BOSE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "4型吉", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153064.jpg", "caption": "a kitchen with a sink, stove and refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415222.jpg", "caption": "a plate of food on a table", "annotations": [{"polygon": [[261, 194], [222, 254], [346, 300], [395, 253], [400, 242], [337, 223]], "text": "EAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EAT", "recog_valid": true, "glyph_recog_text": "EAT", "glyph_recog_ld": 1.0}, {"polygon": [[221, 258], [216, 267], [203, 306], [280, 334], [333, 348], [354, 302]], "text": "THIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THIS", "recog_valid": true, "glyph_recog_text": "THIS", "glyph_recog_ld": 1.0}, {"polygon": [[312, 333], [298, 352], [355, 373], [370, 348]], "text": "2011", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2011", "recog_valid": true, "glyph_recog_text": "2011", "glyph_recog_ld": 1.0}, {"polygon": [[376, 210], [370, 227], [424, 242], [434, 226]], "text": "Weight", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Weight", "recog_valid": true, "glyph_recog_text": "Waight", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[302, 422], [288, 471], [317, 481], [416, 481], [441, 456], [443, 447]], "text": "THAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "THAT", "recog_valid": true, "glyph_recog_text": "THAT", "glyph_recog_ld": 1.0}, {"polygon": [[196, 420], [189, 431], [273, 459], [280, 449]], "text": "ZINCZENKO", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ZINCZENKO", "recog_valid": true, "glyph_recog_text": "ZINCZENKO", "glyph_recog_ld": 1.0}, {"polygon": [[326, 379], [305, 419], [454, 450], [500, 382], [371, 359]], "text": "NOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "NOT", "recog_valid": true, "glyph_recog_text": "NOT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284150.jpg", "caption": "two boys holding tennis rackets", "annotations": [{"polygon": [[294, 250], [295, 273], [302, 272], [321, 267], [326, 254], [328, 247], [324, 241], [309, 243]], "text": "Jump", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Jump", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153080.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[217, 176], [222, 168], [247, 199], [240, 204]], "text": "GUERRERO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HERRERI", "recog_valid": false, "glyph_recog_text": "gyeysoal", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415243.jpg", "caption": "a man is playing tennis on a street", "annotations": [{"polygon": [[211, 265], [212, 275], [227, 278], [242, 244], [221, 241]], "text": "44", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "士", "recog_valid": false, "glyph_recog_text": "yp", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022032.jpg", "caption": "two cars parked in a parking lot with a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153105.jpg", "caption": "a large clock tower with a large clock on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153123.jpg", "caption": "a man wearing a vest and tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415275.jpg", "caption": "a display of vegetables and other items in a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415277.jpg", "caption": "a snowboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546352.jpg", "caption": "a skateboarder doing a trick in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284218.jpg", "caption": "a table with various desserts and cookies", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022080.jpg", "caption": "a donut and a coffee on a table", "annotations": [{"polygon": [[227, 80], [236, 86], [246, 91], [246, 94], [242, 97], [247, 104], [260, 111], [261, 114], [260, 118], [259, 119], [260, 122], [257, 123], [235, 110], [229, 102], [230, 89], [225, 84]], "text": "Tim", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Tim", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[273, 98], [267, 111], [265, 111], [264, 112], [266, 115], [263, 121], [268, 126], [279, 129], [295, 129], [306, 129], [328, 128], [341, 123], [350, 121], [358, 116], [362, 111], [362, 108], [362, 105], [364, 101], [362, 99], [357, 106], [343, 109], [339, 105], [326, 102], [322, 103], [320, 109], [315, 109], [310, 111], [307, 109], [305, 114], [300, 115], [288, 112], [293, 105], [289, 101]], "text": "Hortous", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Hoateuy", "recog_valid": false, "glyph_recog_text": "Hortous", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022086.jpg", "caption": "two children laying in bed with stuffed animals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022102.jpg", "caption": "a man sitting at a table with two pizzas on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415322.jpg", "caption": "a machine that is filled with doughnuts", "annotations": [{"polygon": [[238, 363], [292, 385], [288, 395], [235, 372]], "text": "Robot", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WARK", "recog_valid": false, "glyph_recog_text": "Robet", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284256.jpg", "caption": "a train station with a large window and a large sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153186.jpg", "caption": "a cow standing in a pen at a fair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284259.jpg", "caption": "a plate of food with rice and vegetables", "annotations": [{"polygon": [[38, 206], [34, 221], [28, 239], [23, 258], [10, 256], [3, 250], [12, 214], [17, 198]], "text": "CocaCola", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "2eg.oog", "recog_valid": false, "glyph_recog_text": "(0O3", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[454, 379], [389, 428], [377, 400], [438, 357]], "text": "Coca-Cola", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "2e200g", "recog_valid": false, "glyph_recog_text": "Coca-Cola", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153190.jpg", "caption": "a person on skis jumping over a small ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153202.jpg", "caption": "a doll with red hair sitting next to a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022145.jpg", "caption": "a plate of meat and vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022150.jpg", "caption": "a laptop on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153224.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153235.jpg", "caption": "an old photo of a plane flying over mountains", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284307.jpg", "caption": "a man in a suit and tie waving a pink balloon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153259.jpg", "caption": "a teddy bear wearing a life jacket holding a cupcake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284333.jpg", "caption": "a large jet airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153262.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284336.jpg", "caption": "a motel sign with a red light and a sign that says olympic inn", "annotations": [{"polygon": [[136, 239], [134, 259], [139, 276], [157, 271], [165, 275], [191, 270], [193, 274], [231, 273], [232, 233], [191, 234], [156, 238]], "text": "OLYMPIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OLYMPIC", "recog_valid": true, "glyph_recog_text": "OLYMPIC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284338.jpg", "caption": "a woman in a polka dot dress sitting on a ladder", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284342.jpg", "caption": "a blue and yellow train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284351.jpg", "caption": "a sign on a gate that says h5", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415427.jpg", "caption": "a bus on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022218.jpg", "caption": "a large jet airplane taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022222.jpg", "caption": "a couple of people standing on a street with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022224.jpg", "caption": "a train traveling down the tracks in a rural area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022230.jpg", "caption": "a clock on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546518.jpg", "caption": "a young man is hitting a tennis ball with his racket", "annotations": [{"polygon": [[210, 128], [216, 156], [315, 152], [314, 123]], "text": "WINDSOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WINDSOR", "recog_valid": true, "glyph_recog_text": "WINDSOR", "glyph_recog_ld": 1.0}, {"polygon": [[347, 125], [354, 154], [448, 148], [447, 120], [373, 122]], "text": "VISITORS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "VISITORS", "recog_valid": true, "glyph_recog_text": "VISITORS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022229.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546538.jpg", "caption": "a man holding a tennis racket and a tennis ball", "annotations": [{"polygon": [[328, 128], [366, 85], [485, 89], [492, 99], [484, 114], [453, 119], [449, 130]], "text": "ATP", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "", "recog_valid": false, "glyph_recog_text": "ATP", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415475.jpg", "caption": "a parking meter on the side of a sidewalk", "annotations": [{"polygon": [[187, 133], [192, 137], [196, 130], [203, 122], [209, 118], [219, 114], [227, 111], [230, 110], [229, 102], [223, 104], [219, 105], [208, 110], [198, 118], [194, 122]], "text": "DENVER'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "DENVER'S", "recog_valid": true, "glyph_recog_text": "DENVERS", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[191, 468], [191, 481], [283, 500], [285, 488]], "text": "HOMELESSNESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "HCMELESSNESS", "recog_valid": false, "glyph_recog_text": "HOMELESSNESS", "glyph_recog_ld": 0.9166667361110532}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153340.jpg", "caption": "three motorcycles parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022270.jpg", "caption": "the tools needed to make a bicycle wheel", "annotations": [{"polygon": [[433, 222], [433, 203], [483, 191], [483, 208]], "text": "Abil N", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AbiIN", "recog_valid": false, "glyph_recog_text": "AbilN", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[451, 279], [452, 257], [485, 249], [485, 269]], "text": "Abn", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Abi", "recog_valid": false, "glyph_recog_text": "Abn", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[399, 277], [394, 297], [428, 284], [429, 263]], "text": "Abii", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "15", "recog_valid": false, "glyph_recog_text": "Abij", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022275.jpg", "caption": "a woman sitting at a table with a cat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022291.jpg", "caption": "a poster advertising the epic tennis tournament", "annotations": [{"polygon": [[341, 208], [342, 260], [480, 261], [480, 209]], "text": "EPIC.", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "EPIC", "recog_valid": false, "glyph_recog_text": "EPIC.", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415516.jpg", "caption": "a blue car parked on the beach under a cloudy sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284461.jpg", "caption": "a man flying a kite in the sky", "annotations": [{"polygon": [[451, 345], [472, 395], [514, 371], [513, 332], [492, 340], [482, 332]], "text": "Do", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Dr", "recog_valid": false, "glyph_recog_text": "Do", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546631.jpg", "caption": "a plate with meat, potatoes, and corn", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284498.jpg", "caption": "a man is holding a tennis racket and a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022367.jpg", "caption": "a young girl playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022374.jpg", "caption": "a black cat sitting on a laptop", "annotations": [{"polygon": [[421, 407], [418, 414], [477, 441], [482, 431]], "text": "from", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "completely", "recog_valid": false, "glyph_recog_text": "!:om", "glyph_recog_ld": 0.1000008999991}, {"polygon": [[383, 272], [371, 311], [423, 340], [490, 370], [512, 375], [512, 325], [496, 324]], "text": "MARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "FOO1", "recog_valid": false, "glyph_recog_text": "MARK", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[378, 321], [375, 334], [445, 371], [512, 398], [512, 386]], "text": "MARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MARK", "recog_valid": true, "glyph_recog_text": "MARK", "glyph_recog_ld": 1.0}, {"polygon": [[362, 343], [359, 351], [455, 403], [512, 426], [512, 418], [468, 402]], "text": "MARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "WWWWHOLEFOODSMARI", "recog_valid": false, "glyph_recog_text": "MARK", "glyph_recog_ld": 0.1764710726640749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415604.jpg", "caption": "a cat sitting in a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153462.jpg", "caption": "a train is coming down the tracks with smoke coming out of the engine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153466.jpg", "caption": "a man holding a hammer over a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415611.jpg", "caption": "a small airplane on top of a trailer", "annotations": [{"polygon": [[164, 301], [164, 311], [178, 302], [183, 294], [184, 284], [175, 277], [162, 276], [154, 281], [149, 289], [150, 298], [153, 301], [159, 298], [157, 292], [157, 289], [164, 285], [170, 285], [175, 286], [176, 289]], "text": "AEROBATIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "学", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153475.jpg", "caption": "a man and a woman playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153481.jpg", "caption": "a train engine is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415629.jpg", "caption": "a rusted old truck", "annotations": [{"polygon": [[8, 207], [177, 217], [187, 240], [197, 330], [34, 371]], "text": "WATER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "T", "recog_valid": false, "glyph_recog_text": "WATER", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[211, 217], [290, 223], [294, 303], [211, 325], [208, 308], [203, 221]], "text": "STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "佛一路山", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[295, 227], [302, 218], [315, 213], [332, 218], [337, 234], [335, 245], [305, 268]], "text": "more", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "十", "recog_valid": false, "glyph_recog_text": "mofs", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[307, 263], [323, 256], [341, 237], [346, 277], [342, 284], [333, 301], [309, 314]], "text": "more Shaps!", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "fe -。", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546703.jpg", "caption": "people are standing in a field with kites", "annotations": [{"polygon": [[14, 389], [8, 414], [140, 420], [137, 395]], "text": "Hardly Anyone", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Har8ly Anyone", "recog_valid": false, "glyph_recog_text": "Hardly Anyone", "glyph_recog_ld": 0.9230769822484752}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022429.jpg", "caption": "a birthday cake with a train on top", "annotations": [{"polygon": [[190, 291], [195, 300], [207, 300], [224, 304], [235, 304], [256, 305], [283, 304], [296, 304], [317, 303], [335, 298], [333, 293], [328, 276], [323, 277], [308, 278], [284, 277], [270, 277], [259, 263], [248, 263], [234, 266], [224, 266], [210, 268]], "text": "Alex", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Hlex", "recog_valid": false, "glyph_recog_text": "Alex", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153507.jpg", "caption": "a black and white photo of a small airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284583.jpg", "caption": "a red train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415659.jpg", "caption": "a street sign that says begin one way street", "annotations": [{"polygon": [[212, 220], [265, 201], [267, 178], [215, 199]], "text": "BEGIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BEGIN", "recog_valid": true, "glyph_recog_text": "BEGIN", "glyph_recog_ld": 1.0}, {"polygon": [[216, 224], [214, 247], [257, 233], [258, 210]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[211, 255], [212, 276], [250, 266], [257, 239]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[199, 288], [197, 310], [262, 295], [266, 268]], "text": "STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STREET", "recog_valid": true, "glyph_recog_text": "STREET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022448.jpg", "caption": "a vase with colorful flowers in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022451.jpg", "caption": "a cat laying on a person's lap", "annotations": [{"polygon": [[224, 394], [230, 377], [239, 364], [247, 352], [249, 347], [241, 337], [231, 352], [228, 360], [221, 369], [219, 373], [214, 378]], "text": "Banana", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Banang", "recog_valid": false, "glyph_recog_text": "Banaria", "glyph_recog_ld": 0.5714291836725947}, {"polygon": [[225, 396], [235, 413], [237, 404], [248, 378], [248, 378], [251, 381], [263, 370], [261, 364], [267, 355], [272, 351], [265, 341], [260, 343], [252, 342], [251, 348], [226, 396]], "text": "Pineapple", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tineapple", "recog_valid": false, "glyph_recog_text": "Pineapple", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546739.jpg", "caption": "a truck carrying a large number of tractors on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022456.jpg", "caption": "a green and white sign on the front of a building", "annotations": [{"polygon": [[83, 212], [301, 225], [306, 271], [171, 263], [149, 276], [132, 262], [78, 256]], "text": "Calzoberia", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cagoteeiia", "recog_valid": false, "glyph_recog_text": "Calzoberia", "glyph_recog_ld": 0.5000004999995}, {"polygon": [[305, 247], [332, 222], [362, 232], [428, 243], [432, 269], [355, 268], [349, 288], [309, 269]], "text": "ligure", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ciiece", "recog_valid": false, "glyph_recog_text": "ligure", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153530.jpg", "caption": "a blue train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546757.jpg", "caption": "a man with tattoos on his back talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153542.jpg", "caption": "a desk with a laptop and a desktop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022482.jpg", "caption": "a series of pictures of a baseball player pitching a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022484.jpg", "caption": "a market with lots of vegetables and fruits", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022500.jpg", "caption": "a busy street with many signs and signs", "annotations": [{"polygon": [[444, 319], [448, 362], [475, 360], [476, 317], [443, 316]], "text": "ELEVEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "一", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153577.jpg", "caption": "a plate with waffles and eggs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153591.jpg", "caption": "a person holding a glass of wine with a logo on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284667.jpg", "caption": "a man on a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153616.jpg", "caption": "a man sitting at a table with a pizza and a glass of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153634.jpg", "caption": "a jockey is riding a horse in a race", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153638.jpg", "caption": "a laptop computer with a magazine on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022582.jpg", "caption": "a street sign with a traffic light and a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284732.jpg", "caption": "a group of children sitting around a birthday cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022593.jpg", "caption": "a man eating a slice of pizza while sitting at a computer", "annotations": [{"polygon": [[360, 83], [364, 97], [438, 49], [412, 49]], "text": "CAUTION", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "CAETIOO", "recog_valid": false, "glyph_recog_text": "CAUTION", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[292, 133], [295, 143], [348, 109], [346, 94]], "text": "CAUTION", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PYTION", "recog_valid": false, "glyph_recog_text": "CAUTION", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022624.jpg", "caption": "a man on skis is going down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546918.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[225, 120], [251, 108], [256, 139], [229, 150]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Q", "recog_valid": false, "glyph_recog_text": "nu", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[199, 201], [199, 258], [308, 276], [309, 221]], "text": "ZESAESKI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6300", "recog_valid": false, "glyph_recog_text": "ZESAESKI", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[212, 180], [213, 197], [252, 202], [277, 209], [285, 215], [298, 222], [299, 207], [290, 198], [270, 190]], "text": "2ESAESKI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2ESAESK", "recog_valid": false, "glyph_recog_text": "2ESAESK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153710.jpg", "caption": "a teddy bear sitting on a pile of garbage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546937.jpg", "caption": "two people standing next to a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022650.jpg", "caption": "a man with a beard standing in a kitchen", "annotations": [{"polygon": [[161, 371], [161, 377], [166, 381], [171, 379], [176, 376], [181, 372], [185, 370], [187, 370], [192, 368], [198, 366], [206, 366], [214, 366], [221, 367], [227, 368], [234, 372], [239, 375], [244, 378], [249, 370], [248, 366], [243, 363], [236, 359], [234, 358], [231, 356], [224, 353], [217, 353], [209, 352], [203, 351], [195, 353], [189, 354], [183, 356], [177, 358], [170, 362], [164, 367]], "text": "QUIKSILVER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OOIKSILISA", "recog_valid": false, "glyph_recog_text": "QUIKSILVER", "glyph_recog_ld": 0.5000004999995}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022651.jpg", "caption": "three glasses of wine", "annotations": [{"polygon": [[468, 373], [476, 360], [508, 383], [499, 396]], "text": "NE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "NE", "recog_valid": true, "glyph_recog_text": "NE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546940.jpg", "caption": "a group of people sitting under an umbrella on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415871.jpg", "caption": "a piece of cake with a sign on it", "annotations": [{"polygon": [[137, 341], [137, 341], [143, 337], [155, 335], [159, 337], [169, 342], [185, 334], [229, 326], [240, 328], [234, 341], [193, 350], [158, 359], [150, 359], [145, 356]], "text": "Beetroot", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Rootrool", "recog_valid": false, "glyph_recog_text": "Beetroot", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153730.jpg", "caption": "a person riding a motorcycle on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284814.jpg", "caption": "two skis are laying on the ground next to a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284824.jpg", "caption": "a young boy riding a surfboard in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415901.jpg", "caption": "a clock hanging from a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415912.jpg", "caption": "a man is kite surfing in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022710.jpg", "caption": "a sign on a pole that says florida", "annotations": [{"polygon": [[134, 107], [135, 129], [214, 113], [208, 91]], "text": "FLORIDA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "FLORIDA", "recog_valid": true, "glyph_recog_text": "FLORIDA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284867.jpg", "caption": "a desk with a laptop and a monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284888.jpg", "caption": "a woman riding a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153817.jpg", "caption": "a steam train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547036.jpg", "caption": "a teddy bear sitting on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547044.jpg", "caption": "a group of men playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153829.jpg", "caption": "a man riding a skateboard down a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547043.jpg", "caption": "a man in red and white shirt and red shorts is jumping up to hit the ball", "annotations": [{"polygon": [[151, 469], [151, 458], [159, 441], [167, 441], [169, 453], [177, 441], [181, 440], [182, 455], [194, 455], [206, 455], [212, 457], [208, 465], [203, 464], [200, 467], [194, 465], [188, 465], [184, 470], [180, 471], [166, 467], [157, 469], [151, 469]], "text": "Max ", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Man", "recog_valid": false, "glyph_recog_text": "Max", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284911.jpg", "caption": "a man brushing his teeth in front of a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284932.jpg", "caption": "a person riding a snowboard down a hill", "annotations": [{"polygon": [[37, 411], [38, 359], [99, 343], [101, 403]], "text": "X", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "X", "recog_valid": true, "glyph_recog_text": "x", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153861.jpg", "caption": "two double decker buses parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547089.jpg", "caption": "a man doing a trick on a skateboard", "annotations": [{"polygon": [[129, 368], [149, 334], [188, 333], [214, 327], [260, 327], [311, 349], [324, 352], [329, 362], [324, 401], [306, 408], [274, 384], [252, 388], [218, 377], [201, 376], [178, 372], [136, 371]], "text": "ride", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "rids", "recog_valid": false, "glyph_recog_text": "ride", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416018.jpg", "caption": "two people playing wii games on a large screen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284950.jpg", "caption": "a man riding a motorcycle with a teddy bear on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547097.jpg", "caption": "a stop sign is on a fence in a field", "annotations": [{"polygon": [[87, 305], [198, 319], [203, 264], [90, 250], [86, 305]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284956.jpg", "caption": "a group of children standing around a birthday cake", "annotations": [{"polygon": [[112, 340], [153, 347], [147, 397], [116, 394]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ce", "recog_valid": false, "glyph_recog_text": "to", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153888.jpg", "caption": "a fighter jet sitting on the tarmac with people standing around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284979.jpg", "caption": "a clock on a post on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153907.jpg", "caption": "a refrigerator sitting on a brick sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022837.jpg", "caption": "a pizza on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153921.jpg", "caption": "a soccer game with players on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416076.jpg", "caption": "three people posing for a photo on a ski slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153938.jpg", "caption": "a woman sitting at a table with a pizza", "annotations": [{"polygon": [[198, 272], [201, 267], [204, 265], [208, 265], [212, 261], [217, 257], [221, 257], [227, 258], [232, 263], [233, 268], [231, 274], [225, 278], [222, 281], [238, 277], [243, 275], [255, 274], [258, 276], [259, 278], [262, 275], [267, 273], [277, 271], [288, 269], [287, 272], [289, 275], [289, 277], [289, 281], [283, 285], [278, 288], [265, 290], [259, 290], [251, 291], [245, 292], [243, 296], [240, 300], [234, 302], [228, 303], [224, 302], [219, 301], [216, 298], [211, 302], [204, 306], [197, 306], [190, 304], [186, 298], [183, 292], [182, 286], [185, 281], [187, 279], [192, 279], [194, 281], [196, 281], [196, 278], [197, 275]], "text": "Supez", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S1", "recog_valid": false, "glyph_recog_text": "Supea", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[260, 249], [299, 249], [300, 264], [298, 280], [297, 283], [290, 284], [286, 282], [260, 268]], "text": "79", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "19", "recog_valid": false, "glyph_recog_text": "79", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285019.jpg", "caption": "a motorcycle with a stop sign in the rear view mirror", "annotations": [{"polygon": [[74, 283], [111, 264], [116, 278], [78, 298]], "text": "HUSOVAMA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Husoienn", "recog_valid": false, "glyph_recog_text": "shusreinr", "glyph_recog_ld": 0.333334074073251}, {"polygon": [[388, 288], [341, 304], [344, 319], [391, 305]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "90T2", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547164.jpg", "caption": "a fire hydrant in front of a building", "annotations": [{"polygon": [[209, 227], [208, 243], [261, 229], [261, 212]], "text": "Lincoln", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "bmoh", "recog_valid": false, "glyph_recog_text": "Lincoin", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[269, 208], [267, 227], [336, 209], [333, 190]], "text": "Theatre", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "theabre", "recog_valid": false, "glyph_recog_text": "Theatre", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416095.jpg", "caption": "a large building with a clock and people walking around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022880.jpg", "caption": "a cuckoo clock with bears and birds on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416111.jpg", "caption": "a man sitting at a table with a laptop", "annotations": [{"polygon": [[326, 312], [326, 327], [408, 348], [410, 337]], "text": "carcassonne", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Carcasstpne", "recog_valid": false, "glyph_recog_text": "CBSSCMte", "glyph_recog_ld": 0.2727279338836964}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285041.jpg", "caption": "a person laying on a bench with a backpack", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285042.jpg", "caption": "a green train is parked at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416119.jpg", "caption": "a man and woman in a yellow airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285064.jpg", "caption": "two women playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153997.jpg", "caption": "a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022926.jpg", "caption": "a boat with an umbrella on it", "annotations": [{"polygon": [[294, 226], [323, 237], [314, 259], [285, 246]], "text": "PS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2器", "recog_valid": false, "glyph_recog_text": "PS", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022940.jpg", "caption": "a person riding a dirt bike on a muddy track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416165.jpg", "caption": "a large clock tower with a christmas tree in front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547247.jpg", "caption": "a cross country skier in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022964.jpg", "caption": "a woman in a blue dress is playing tennis", "annotations": [{"polygon": [[56, 112], [307, 105], [312, 197], [36, 194]], "text": "AEGO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AECOL", "recog_valid": false, "glyph_recog_text": "AEGO", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[81, 238], [288, 230], [292, 252], [81, 264]], "text": "INTERNATIONAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "INTERNA", "recog_valid": false, "glyph_recog_text": "INTERNATIONAL", "glyph_recog_ld": 0.5384618934908512}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154037.jpg", "caption": "a woman is preparing a drink in a blender", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416186.jpg", "caption": "a double decker bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022987.jpg", "caption": "a woman with face paint on her face", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154060.jpg", "caption": "a red fire hydrant in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416202.jpg", "caption": "a woman in a blue shirt and black skirt is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285131.jpg", "caption": "a red arrow pointing to a street sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547280.jpg", "caption": "a street with traffic lights and a mcdonalds", "annotations": [{"polygon": [[450, 223], [448, 261], [498, 260], [491, 218]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "M", "recog_valid": true, "glyph_recog_text": "M", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285138.jpg", "caption": "a man holding a skateboard with a skull and crossbones on it", "annotations": [{"polygon": [[179, 223], [238, 218], [239, 193], [178, 202], [178, 202]], "text": "DEATH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DEATH", "recog_valid": true, "glyph_recog_text": "DEATH", "glyph_recog_ld": 1.0}, {"polygon": [[266, 372], [268, 407], [322, 411], [336, 386]], "text": "TREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TREIET", "recog_valid": false, "glyph_recog_text": "TREET", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[258, 311], [262, 339], [320, 365], [331, 363], [342, 348], [321, 332], [286, 316]], "text": "TRAUSS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TRAUSS", "recog_valid": true, "glyph_recog_text": "TRAUSS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154073.jpg", "caption": "two men on the podium with one pointing to the other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023004.jpg", "caption": "two jockeys are racing on the back of their horses", "annotations": [{"polygon": [[256, 192], [263, 231], [244, 234], [224, 227], [242, 194]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "4", "recog_valid": true, "glyph_recog_text": "4", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023015.jpg", "caption": "a tennis player is about to serve the ball", "annotations": [{"polygon": [[93, 272], [91, 289], [126, 261], [124, 251]], "text": "ACURA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ALZRA", "recog_valid": false, "glyph_recog_text": "ACURA", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154096.jpg", "caption": "a woman playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547315.jpg", "caption": "a table with three glasses of beer and a laptop", "annotations": [{"polygon": [[0, 277], [5, 294], [80, 268], [80, 254]], "text": "SUNNES", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "GIINNES", "recog_valid": false, "glyph_recog_text": "SUNNES", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416247.jpg", "caption": "a man sitting at a desk using a laptop computer", "annotations": [{"polygon": [[169, 324], [191, 329], [195, 336], [198, 362], [204, 376], [212, 390], [222, 415], [226, 426], [221, 426], [197, 417], [190, 404], [184, 391], [177, 370]], "text": "AKEFRONT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "INU83XU", "recog_valid": false, "glyph_recog_text": "AKEFRONT", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416248.jpg", "caption": "a woman and a child sitting on a motorcycle", "annotations": [{"polygon": [[319, 369], [304, 380], [378, 405], [392, 396]], "text": "ZZR", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "P7P", "recog_valid": false, "glyph_recog_text": "ZZR", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416246.jpg", "caption": "a stop sign on a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023050.jpg", "caption": "a man doing a trick on a skateboard", "annotations": [{"polygon": [[180, 164], [179, 145], [191, 131], [220, 135], [266, 131], [260, 162], [220, 170]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Q8", "recog_valid": false, "glyph_recog_text": "A", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[181, 171], [192, 204], [226, 201], [247, 182], [253, 163]], "text": "E", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "E", "recog_valid": true, "glyph_recog_text": "E", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285195.jpg", "caption": "a bus with a flower design on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285194.jpg", "caption": "a man dressed in a banana costume", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154123.jpg", "caption": "a small room with a desk, a bed, and a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023072.jpg", "caption": "a woman holding an umbrella with the words live and let live written on it", "annotations": [{"polygon": [[156, 417], [121, 442], [147, 498], [188, 476]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "79", "recog_valid": false, "glyph_recog_text": "5", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285217.jpg", "caption": "two men holding a large teddy bear", "annotations": [{"polygon": [[45, 196], [86, 166], [86, 193], [45, 217]], "text": "MEMBERSHIP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "EE济", "recog_valid": false, "glyph_recog_text": "ME2C36P", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416297.jpg", "caption": "a plate with a sandwich and a cup of coffee", "annotations": [{"polygon": [[35, 335], [27, 250], [35, 246], [54, 247], [61, 252], [65, 269], [55, 269], [46, 270], [46, 270], [49, 289], [57, 283], [57, 283], [57, 283], [63, 283], [63, 289], [54, 296], [51, 305], [51, 316], [55, 332], [55, 332], [44, 341]], "text": "andreu", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Ctneieu", "recog_valid": false, "glyph_recog_text": "NCo.", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023096.jpg", "caption": "a parking meter on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416322.jpg", "caption": "a mouse and keyboard on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023134.jpg", "caption": "a woman sitting in a chair holding a box of kleenex", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023140.jpg", "caption": "a baseball player holding a bat on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416357.jpg", "caption": "a man holding a tennis racket on a court", "annotations": [{"polygon": [[175, 277], [177, 327], [503, 349], [504, 303]], "text": "SPORTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CAPORTS", "recog_valid": false, "glyph_recog_text": "SPORTS", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285296.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023158.jpg", "caption": "a pizza and fries on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547449.jpg", "caption": "a green and white vw bus with two people in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416387.jpg", "caption": "a hospital room with a clock and medical equipment", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547465.jpg", "caption": "a stop sign with stickers on it", "annotations": [{"polygon": [[231, 148], [244, 184], [283, 182], [296, 146]], "text": "VT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VT", "recog_valid": true, "glyph_recog_text": "VT", "glyph_recog_ld": 1.0}, {"polygon": [[111, 196], [111, 329], [406, 326], [418, 196]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[243, 387], [253, 400], [310, 356], [297, 344], [242, 386]], "text": "Steamboat", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Steamiboat", "recog_valid": false, "glyph_recog_text": "Steamboat", "glyph_recog_ld": 0.9000000999999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023203.jpg", "caption": "an old yellow bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547528.jpg", "caption": "a street sign with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547533.jpg", "caption": "a woman on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285397.jpg", "caption": "miniature donuts on a plate with a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154329.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285405.jpg", "caption": "a group of colorful boxes sitting on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023287.jpg", "caption": "a man holding a frisbee on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416510.jpg", "caption": "polo team riding horses in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154399.jpg", "caption": "a green and white airplane", "annotations": [{"polygon": [[280, 262], [299, 245], [349, 254], [334, 278]], "text": "Aer Lingus", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Aerlingus", "recog_valid": false, "glyph_recog_text": "Aer Lingus", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285477.jpg", "caption": "a stack of books on a bed", "annotations": [{"polygon": [[418, 158], [372, 187], [347, 181], [397, 155]], "text": "SURFACES", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "OOH", "recog_valid": false, "glyph_recog_text": "SURFACES", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[448, 172], [382, 214], [336, 207], [412, 170]], "text": "POLYMER", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "R", "recog_valid": false, "glyph_recog_text": "POLYMER", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285488.jpg", "caption": "a table with several different types of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285493.jpg", "caption": "a yellow pedestrian crossing sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416576.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023361.jpg", "caption": "a traffic light is on the corner of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416589.jpg", "caption": "a blurry image of people in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285526.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547675.jpg", "caption": "a train on the tracks with a crossing sign", "annotations": [{"polygon": [[1, 148], [0, 175], [65, 99], [54, 86]], "text": "ROAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": ".ROSSING", "recog_valid": false, "glyph_recog_text": "OAD", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[33, 142], [42, 133], [69, 162], [61, 173]], "text": "ROAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ROAD", "recog_valid": true, "glyph_recog_text": "ROAD", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416612.jpg", "caption": "a man in a suit and tie cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285548.jpg", "caption": "a desk with a computer, a phone, and a plant", "annotations": [{"polygon": [[167, 371], [152, 380], [79, 350], [95, 342]], "text": "HA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "4C", "recog_valid": false, "glyph_recog_text": "HA", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547693.jpg", "caption": "three men playing a video game", "annotations": [{"polygon": [[5, 223], [-1, 240], [37, 255], [41, 241]], "text": "SANTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SANTA", "recog_valid": true, "glyph_recog_text": "SANTA", "glyph_recog_ld": 1.0}, {"polygon": [[46, 240], [44, 258], [83, 271], [83, 257]], "text": "CATALINA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CATALINA", "recog_valid": true, "glyph_recog_text": "CATALAE", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[0, 244], [-2, 337], [60, 347], [75, 306], [79, 272]], "text": "89", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "39", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285550.jpg", "caption": "a boat is being lifted by a crane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023415.jpg", "caption": "a black and white photo of a man sleeping in a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547703.jpg", "caption": "a woman is holding a dog in the air", "annotations": [{"polygon": [[208, 9], [208, 46], [282, 46], [278, 10]], "text": "FALL.", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "FAIL", "recog_valid": false, "glyph_recog_text": "FALL.", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[87, 461], [94, 497], [161, 497], [161, 460]], "text": "YOU", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "YOU", "recog_valid": true, "glyph_recog_text": "YOU", "glyph_recog_ld": 1.0}, {"polygon": [[170, 460], [169, 497], [232, 498], [229, 459]], "text": "DID", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DID", "recog_valid": true, "glyph_recog_text": "DID", "glyph_recog_ld": 1.0}, {"polygon": [[241, 462], [241, 497], [272, 498], [278, 459]], "text": "IT", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "IT", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[284, 460], [284, 497], [389, 496], [393, 459]], "text": "RIGHT.", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "RIGHT", "recog_valid": false, "glyph_recog_text": "RIGHT.", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285563.jpg", "caption": "a man holding an umbrella and holding a sign", "annotations": [{"polygon": [[95, 248], [160, 257], [158, 263], [150, 283], [100, 275], [94, 273]], "text": "ADCTPORT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NCTPORT", "recog_valid": false, "glyph_recog_text": "ADCTPORT", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[162, 257], [191, 262], [187, 287], [185, 290], [155, 284]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5E3", "recog_valid": false, "glyph_recog_text": "6", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[173, 298], [216, 307], [211, 328], [204, 337], [177, 333], [165, 328], [167, 319]], "text": "TANH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AMH", "recog_valid": false, "glyph_recog_text": "TANH", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[427, 312], [511, 316], [512, 387], [438, 381]], "text": "NP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "nE", "recog_valid": false, "glyph_recog_text": "NP", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023420.jpg", "caption": "a baseball player is swinging his bat at a ball", "annotations": [{"polygon": [[300, 298], [350, 291], [354, 362], [305, 368]], "text": "D", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "食", "recog_valid": false, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416648.jpg", "caption": "a man sitting at a table with a basket of food", "annotations": [{"polygon": [[0, 207], [0, 236], [22, 234], [42, 230], [68, 225], [146, 210], [144, 181], [55, 196], [45, 198]], "text": "edtouch", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "edtouch", "recog_valid": true, "glyph_recog_text": "edtouch", "glyph_recog_ld": 1.0}, {"polygon": [[56, 237], [58, 259], [96, 249], [145, 237], [142, 211]], "text": "media", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "nedia", "recog_valid": false, "glyph_recog_text": "media", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416653.jpg", "caption": "a group of people standing around a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416651.jpg", "caption": "a group of men in black and yellow uniforms", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547730.jpg", "caption": "a man in red shirt and black shorts is about to hit a tennis ball", "annotations": [{"polygon": [[290, 89], [291, 124], [343, 123], [352, 125], [366, 123], [375, 124], [390, 123], [412, 122], [438, 122], [467, 122], [496, 120], [513, 120], [513, 84], [493, 82], [442, 82], [408, 81], [370, 80], [345, 81]], "text": "MELBOURNE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "MELBOURNE", "recog_valid": true, "glyph_recog_text": "MELBOURNE", "glyph_recog_ld": 1.0}, {"polygon": [[230, 222], [231, 248], [306, 249], [300, 219]], "text": "KIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KIA", "recog_valid": true, "glyph_recog_text": "KIA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154518.jpg", "caption": "a person holding a gps device in their hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285603.jpg", "caption": "a highway with cars and trucks on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285618.jpg", "caption": "a large chocolate cow statue in a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023480.jpg", "caption": "a man on a tennis court with a racket", "annotations": [{"polygon": [[123, 160], [117, 187], [64, 176], [64, 157]], "text": "urope1", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "urope1", "recog_valid": true, "glyph_recog_text": "urope1", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547774.jpg", "caption": "a group of people standing near a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023487.jpg", "caption": "an old black and white photo of a street with horse drawn carriages", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023501.jpg", "caption": "a red fire hydrant sitting in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285649.jpg", "caption": "a truck driving down the road", "annotations": [{"polygon": [[204, 152], [204, 152], [227, 128], [257, 131], [264, 149], [265, 183], [205, 198]], "text": "Koch", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Hoch", "recog_valid": false, "glyph_recog_text": "s", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547794.jpg", "caption": "a man and a woman are holding a beer and talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416723.jpg", "caption": "a group of people sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154600.jpg", "caption": "three men in the water holding surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547827.jpg", "caption": "a man sleeping in bed with a quote", "annotations": [{"polygon": [[15, 288], [17, 319], [81, 318], [79, 292]], "text": "Please", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Pleaee", "recog_valid": false, "glyph_recog_text": "Please", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[83, 284], [84, 312], [125, 324], [126, 297], [114, 285]], "text": "help", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "help", "recog_valid": true, "glyph_recog_text": "help", "glyph_recog_ld": 1.0}, {"polygon": [[56, 319], [56, 345], [93, 349], [96, 320]], "text": "lost", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "losh", "recog_valid": false, "glyph_recog_text": "lost", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[146, 402], [142, 434], [157, 435], [161, 435], [164, 424], [190, 426], [190, 397], [162, 402]], "text": "s just", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ioot", "recog_valid": false, "glyph_recog_text": "s jus", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[60, 406], [56, 428], [92, 427], [96, 428], [96, 436], [106, 440], [117, 431], [114, 405]], "text": "BATLERY", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "balleny", "recog_valid": false, "glyph_recog_text": "BATLERY", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023538.jpg", "caption": "a plate of food with fries and a side of coleslaw", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416766.jpg", "caption": "a yellow dump truck parked in a garage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285710.jpg", "caption": "a large body of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285717.jpg", "caption": "a street sign with a blue arrow pointing to cosmic", "annotations": [{"polygon": [[52, 238], [61, 304], [264, 287], [248, 217]], "text": "COSMIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "COSMIC", "recog_valid": true, "glyph_recog_text": "COSMIC", "glyph_recog_ld": 1.0}, {"polygon": [[288, 236], [331, 231], [339, 269], [292, 271]], "text": "LN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LN", "recog_valid": true, "glyph_recog_text": "LN", "glyph_recog_ld": 1.0}, {"polygon": [[187, 191], [224, 109], [246, 164], [219, 209], [194, 214]], "text": "CENTR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "CENTR", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416810.jpg", "caption": "a girl swinging a tennis racket at a tennis ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154710.jpg", "caption": "a jockey rides a horse on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154713.jpg", "caption": "a plate with three hot dogs on it", "annotations": [{"polygon": [[83, 372], [99, 375], [106, 373], [130, 378], [142, 381], [154, 383], [143, 360], [128, 357], [110, 353], [92, 346]], "text": "Boo", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Boo", "recog_valid": true, "glyph_recog_text": "Boo", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023648.jpg", "caption": "a street sign on a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416869.jpg", "caption": "a black and white photo of a large airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154727.jpg", "caption": "a southwest airlines plane on the runway", "annotations": [{"polygon": [[96, 176], [104, 169], [150, 245], [139, 252]], "text": "SOUTHWEST", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SOUTHWEST", "recog_valid": true, "glyph_recog_text": "SOUTHWEST", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416875.jpg", "caption": "a man cutting a cake in a box", "annotations": [{"polygon": [[223, 422], [202, 430], [187, 406], [200, 400], [220, 416]], "text": "Gopal", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "redolg", "recog_valid": false, "glyph_recog_text": "Copa", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285810.jpg", "caption": "a man walking down a street in a black and white photo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285817.jpg", "caption": "a display case filled with various types of pastries", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154752.jpg", "caption": "a man walking down a street with an umbrella", "annotations": [{"polygon": [[217, 100], [215, 118], [246, 146], [248, 135]], "text": "QUIZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "QEIZ", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154755.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285841.jpg", "caption": "an old black and white photo of a ship in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000547985.jpg", "caption": "a laptop computer sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548010.jpg", "caption": "people are waiting in line at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023759.jpg", "caption": "a clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000416997.jpg", "caption": "a woman holding a tennis racket on a tennis court", "annotations": [{"polygon": [[358, 253], [399, 253], [401, 304], [359, 304]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285958.jpg", "caption": "a dog sitting on the floor in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417036.jpg", "caption": "a train decorated with christmas lights and santa claus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000285963.jpg", "caption": "a group of men on horses with flags in the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154898.jpg", "caption": "a statue of a man holding a baseball bat and a child", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286009.jpg", "caption": "a dog laying on a bed with a book", "annotations": [{"polygon": [[386, 92], [410, 90], [419, 118], [395, 121]], "text": "T.C.", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "T.C", "recog_valid": false, "glyph_recog_text": "T.C.", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[400, 134], [425, 132], [450, 199], [424, 204]], "text": "BOYLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BOYLE", "recog_valid": true, "glyph_recog_text": "BOYLE", "glyph_recog_ld": 1.0}, {"polygon": [[324, 105], [330, 198], [375, 199], [346, 103]], "text": "HUMAN FLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HUMAN FLY", "recog_valid": true, "glyph_recog_text": "工与器<名业", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286013.jpg", "caption": "a young boy wearing a hat and tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417090.jpg", "caption": "monster mash at the park", "annotations": [{"polygon": [[9, 217], [5, 229], [5, 247], [156, 247], [157, 240], [151, 217]], "text": "MONSTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "MOnStEr", "recog_valid": false, "glyph_recog_text": "MONSTER", "glyph_recog_ld": 0.5714291836725947}, {"polygon": [[180, 217], [169, 243], [175, 246], [225, 247], [252, 243], [254, 222], [241, 217]], "text": "MASH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "M4SH", "recog_valid": false, "glyph_recog_text": "MASH", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548171.jpg", "caption": "a city street at night with many neon signs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548177.jpg", "caption": "a baseball player throwing a ball", "annotations": [{"polygon": [[300, 173], [362, 190], [344, 250], [289, 233]], "text": "55", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "55", "recog_valid": true, "glyph_recog_text": "55", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417105.jpg", "caption": "a sink with a toothbrush and toothpaste", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548183.jpg", "caption": "a truck carrying a large crane on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000154972.jpg", "caption": "a large passenger jet on the runway at an airport", "annotations": [{"polygon": [[178, 262], [192, 290], [238, 289], [219, 259]], "text": "vueling", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "weing", "recog_valid": false, "glyph_recog_text": "vueling", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023906.jpg", "caption": "a pan with food on it and a glass of wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286051.jpg", "caption": "a group of people waiting at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417129.jpg", "caption": "a street with a street sign and a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000023914.jpg", "caption": "a man sitting on a couch with a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417134.jpg", "caption": "a person holding a flip phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417153.jpg", "caption": "a laptop with stickers on it sitting on a desk", "annotations": [{"polygon": [[276, 191], [290, 218], [296, 221], [324, 248], [329, 245], [325, 229], [321, 214], [316, 204], [290, 172]], "text": "Rookies", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Rookien", "recog_valid": false, "glyph_recog_text": "Rookies", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[212, 224], [208, 234], [272, 275], [276, 266]], "text": "UNDERGROUND", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNDERGROUND", "recog_valid": true, "glyph_recog_text": "JNDERGROUNO", "glyph_recog_ld": 0.8181819834709241}, {"polygon": [[133, 259], [126, 276], [149, 293], [151, 300], [157, 298], [173, 310], [178, 301], [161, 286], [137, 269], [140, 263]], "text": "foursquare", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "foursquare", "recog_valid": true, "glyph_recog_text": "foursquare", "glyph_recog_ld": 1.0}, {"polygon": [[304, 168], [304, 179], [312, 180], [320, 185], [325, 192], [328, 200], [329, 209], [329, 218], [338, 218], [338, 206], [336, 193], [331, 183], [326, 176], [321, 172], [312, 168]], "text": "COFFEESHOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ANFFEESHOP", "recog_valid": false, "glyph_recog_text": "DOPFEESHOA", "glyph_recog_ld": 0.6000003999996}, {"polygon": [[282, 209], [273, 206], [273, 218], [274, 227], [276, 235], [280, 242], [287, 250], [296, 256], [307, 257], [317, 255], [314, 242], [305, 245], [294, 242], [287, 233], [283, 224], [282, 214]], "text": "AMSTERDAM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VZSTEDN", "recog_valid": false, "glyph_recog_text": "AINSTERDAM", "glyph_recog_ld": 0.40000059999939996}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417160.jpg", "caption": "a man standing in front of a table with wine bottles", "annotations": [{"polygon": [[240, 302], [242, 318], [309, 295], [304, 279]], "text": "PRAMID", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PRAMII", "recog_valid": false, "glyph_recog_text": "PRAMID", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417171.jpg", "caption": "a kitchen with green cabinets and a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286101.jpg", "caption": "a large airplane parked on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286120.jpg", "caption": "flickr is a photo sharing website that allows users to upload, edit, and share photos and videos", "annotations": [{"polygon": [[156, 143], [266, 140], [267, 172], [157, 174]], "text": "FlicKr", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "flickr", "recog_valid": false, "glyph_recog_text": "FlicKr", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417204.jpg", "caption": "a man is standing on the side of a boat", "annotations": [{"polygon": [[295, 344], [287, 375], [365, 398], [462, 423], [475, 412], [481, 395], [482, 374], [480, 364], [461, 359], [405, 352], [369, 346], [325, 340], [302, 338]], "text": "25686", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "25686", "recog_valid": true, "glyph_recog_text": "25686", "glyph_recog_ld": 1.0}, {"polygon": [[180, 312], [170, 335], [145, 312], [155, 294]], "text": "GUARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CUARO", "recog_valid": false, "glyph_recog_text": "CUARD", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155061.jpg", "caption": "a man standing next to a train on display", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286137.jpg", "caption": "a group of people standing near a food truck", "annotations": [{"polygon": [[320, 158], [320, 194], [368, 195], [368, 160]], "text": "MAKI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MAKI", "recog_valid": true, "glyph_recog_text": "MAKI", "glyph_recog_ld": 1.0}, {"polygon": [[319, 204], [321, 240], [367, 240], [366, 203]], "text": "TACO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZACO", "recog_valid": false, "glyph_recog_text": "TACO", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286144.jpg", "caption": "a church with a sign that says tessell park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155076.jpg", "caption": "a clock on a pole in front of a store", "annotations": [{"polygon": [[289, 60], [297, 71], [308, 65], [319, 56], [331, 53], [343, 49], [354, 50], [372, 54], [382, 59], [387, 41], [380, 36], [363, 35], [349, 32], [322, 39], [304, 48]], "text": "KATAYAMA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "KATAYAONA", "recog_valid": false, "glyph_recog_text": "KATAYAMA", "glyph_recog_ld": 0.7777780246910837}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286151.jpg", "caption": "a man and woman riding a cart down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417230.jpg", "caption": "two police officers riding horses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286162.jpg", "caption": "a bed with two pillows and a blanket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024020.jpg", "caption": "a small plane with a maple leaf on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024019.jpg", "caption": "a clock on a pole in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286166.jpg", "caption": "a group of jockeys racing horses on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155097.jpg", "caption": "a street with a lot of cars and a lot of buildings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024026.jpg", "caption": "a man taking a picture of a pizza", "annotations": [{"polygon": [[448, 242], [387, 246], [387, 284], [447, 270]], "text": "89", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "gnu lt", "recog_valid": false, "glyph_recog_text": "89", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548318.jpg", "caption": "a baby is watching tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286181.jpg", "caption": "a street sign with a neon sign on it", "annotations": [{"polygon": [[158, 275], [133, 290], [89, 271], [24, 272], [43, 236], [78, 234], [128, 243]], "text": "Juicebar", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Ehuicebur", "recog_valid": false, "glyph_recog_text": "Juicebar", "glyph_recog_ld": 0.6666670370366254}, {"polygon": [[31, 263], [31, 263], [35, 304], [23, 309], [-1, 307], [-3, 282]], "text": "ch", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "o.", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155138.jpg", "caption": "a woman holding an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024072.jpg", "caption": "a kitten being fed by a bottle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548362.jpg", "caption": "a couple of kids eating at a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024076.jpg", "caption": "three paintings of scissors on easels in an art studio", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286227.jpg", "caption": "a man and a dog are walking in a field with the golden gate bridge in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417303.jpg", "caption": "a group of people walking on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286233.jpg", "caption": "a traffic light with a sign that says family zone", "annotations": [{"polygon": [[239, -5], [254, 21], [157, 190], [137, 162]], "text": "UNG", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "C", "recog_valid": false, "glyph_recog_text": "UNG", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[181, -1], [192, -1], [209, 10], [129, 134], [107, 112]], "text": "UNG", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "422@", "recog_valid": false, "glyph_recog_text": "UNG", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[212, 224], [209, 231], [246, 256], [249, 248]], "text": "UNIVERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNIVET", "recog_valid": false, "glyph_recog_text": "wcvtRn", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[94, 264], [81, 283], [149, 313], [222, 365], [230, 336], [169, 298]], "text": "FAMILY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FAMILYABO", "recog_valid": false, "glyph_recog_text": "FAMILY", "glyph_recog_ld": 0.6666670370366254}, {"polygon": [[215, 288], [206, 297], [230, 319], [237, 310]], "text": "7 am", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "7AM", "recog_valid": false, "glyph_recog_text": "7am", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024091.jpg", "caption": "a car with bicycles on the roof", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155167.jpg", "caption": "a street sign with a columbus street sign and a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155170.jpg", "caption": "a police officer riding a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155172.jpg", "caption": "a hot dog and french fries in a tray", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548393.jpg", "caption": "a box of donuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155198.jpg", "caption": "a woman in a pink hat and purple dress holding a pink umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548418.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[284, 297], [276, 323], [307, 330], [312, 306]], "text": "15", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "15", "recog_valid": true, "glyph_recog_text": "15", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155219.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286292.jpg", "caption": "a cow walking down the street in front of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024149.jpg", "caption": "a laptop with a glass of wine on the table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548452.jpg", "caption": "a woman laying in bed reading a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286328.jpg", "caption": "a person holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024193.jpg", "caption": "a train traveling through a field with black smoke", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155268.jpg", "caption": "a woman sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286376.jpg", "caption": "a group of skateboards with designs on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155306.jpg", "caption": "a white bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286382.jpg", "caption": "a group of people in formal wear standing around a kitchen", "annotations": [{"polygon": [[28, 414], [-1, 430], [1, 444], [32, 424], [31, 416]], "text": "Biscuit", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "scuit", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548537.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548544.jpg", "caption": "a man walking on the beach with a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024257.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286406.jpg", "caption": "a soccer player in mid air catching a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548559.jpg", "caption": "a train station with many trains parked on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548575.jpg", "caption": "a cake with a train on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024296.jpg", "caption": "a baseball player running on a field", "annotations": [{"polygon": [[215, 93], [225, 141], [415, 136], [412, 89]], "text": "WHEEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "WHEEL", "recog_valid": true, "glyph_recog_text": "WHEEL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286442.jpg", "caption": "a table with a cell phone, wallet, keys, and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548597.jpg", "caption": "a train engine is parked in front of a yellow building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417528.jpg", "caption": "a blue dump truck parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417556.jpg", "caption": "a man on a motorcycle", "annotations": [{"polygon": [[149, 67], [148, 82], [208, 102], [209, 89]], "text": "2067676653", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "20676753", "recog_valid": false, "glyph_recog_text": "2087676653", "glyph_recog_ld": 0.7000002999996999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155417.jpg", "caption": "a soldier is holding a hose while standing next to a large military plane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024352.jpg", "caption": "a woman in a purple dress is playing tennis", "annotations": [{"polygon": [[219, 124], [220, 164], [412, 165], [411, 124]], "text": "CHASE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CHASE", "recog_valid": true, "glyph_recog_text": "CHASE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548652.jpg", "caption": "a neon sign on a building at night", "annotations": [{"polygon": [[320, 84], [316, 113], [364, 114], [366, 84]], "text": "EMN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "MEW", "recog_valid": false, "glyph_recog_text": "EMN", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286519.jpg", "caption": "a man laying in bed with two laptops", "annotations": [{"polygon": [[387, 229], [389, 258], [297, 261], [277, 236]], "text": "VAIO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VA1O", "recog_valid": false, "glyph_recog_text": "VAIO", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417595.jpg", "caption": "a baseball player standing on a field holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417602.jpg", "caption": "a fruit stand with many different types of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548689.jpg", "caption": "a person holding a cell phone with a message on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548690.jpg", "caption": "a sign on the side of a road in the desert", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286550.jpg", "caption": "three people sitting at a table with laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548702.jpg", "caption": "steve jobs quote", "annotations": [{"polygon": [[364, 208], [364, 239], [439, 238], [442, 215]], "text": "hungry", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "hungry", "recog_valid": true, "glyph_recog_text": "hungry", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286564.jpg", "caption": "a large black plane is on display in a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155496.jpg", "caption": "a computer monitor with a man on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155509.jpg", "caption": "a man on skis standing in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548731.jpg", "caption": "a stop sign with a traffic sign and a construction sign", "annotations": [{"polygon": [[71, 139], [71, 139], [72, 178], [166, 180], [167, 141]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155519.jpg", "caption": "a group of children in a bathroom mirror brushing their teeth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286611.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[229, 224], [327, 211], [332, 240], [235, 249]], "text": "joyyges", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Voy ges", "recog_valid": false, "glyph_recog_text": "joyyges", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[368, 207], [442, 199], [445, 225], [370, 231]], "text": "Ecker", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Ecker", "recog_valid": true, "glyph_recog_text": "Ecker", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286613.jpg", "caption": "a sign that says no parking on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417689.jpg", "caption": "a black and white photo of a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548768.jpg", "caption": "a statue of a cow with a blow dryer on its head", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417701.jpg", "caption": "graffiti on a train car with a purple and green design", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024489.jpg", "caption": "a double decker bus is parked next to a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024497.jpg", "caption": "a cat sitting on top of a truck", "annotations": [{"polygon": [[291, 296], [339, 268], [364, 320], [319, 347]], "text": "SE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SE", "recog_valid": true, "glyph_recog_text": "留", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[362, 253], [424, 215], [450, 266], [388, 304]], "text": "145", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "145", "recog_valid": true, "glyph_recog_text": "145", "glyph_recog_ld": 1.0}, {"polygon": [[449, 200], [492, 175], [513, 228], [474, 253]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "云", "recog_valid": false, "glyph_recog_text": "ST", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[412, 310], [451, 319], [462, 368], [430, 359]], "text": "56", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "5 6", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[469, 323], [497, 329], [513, 379], [488, 373]], "text": "AV", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "A", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[378, 324], [385, 351], [412, 352], [393, 317]], "text": "SE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "岁", "recog_valid": false, "glyph_recog_text": "SE", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155582.jpg", "caption": "a stop sign with the word stop written on it", "annotations": [{"polygon": [[1, 248], [278, 345], [382, 371], [495, 267], [496, 199], [85, 62], [38, 63], [-2, 190]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417741.jpg", "caption": "a group of people standing in front of a green vehicle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286673.jpg", "caption": "a pizza on a glass table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548822.jpg", "caption": "a man in red shirt kicking a soccer ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286688.jpg", "caption": "1910 ford model t fire truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286692.jpg", "caption": "a bed with white sheets and pillows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024547.jpg", "caption": "a pizza in a box with a slice missing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286698.jpg", "caption": "a man and a woman playing frisbee in the snow", "annotations": [{"polygon": [[448, 303], [446, 369], [412, 369], [412, 301]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LO", "recog_valid": false, "glyph_recog_text": "LO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286704.jpg", "caption": "people standing in front of a building with a parking sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024571.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155652.jpg", "caption": "a city street with bicycles parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024582.jpg", "caption": "a large clock tower with a large clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417802.jpg", "caption": "a black cat sitting on a desk with a computer", "annotations": [{"polygon": [[19, 382], [18, 432], [95, 432], [97, 384]], "text": "GET", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "GET", "recog_valid": true, "glyph_recog_text": "GET", "glyph_recog_ld": 1.0}, {"polygon": [[105, 383], [109, 431], [222, 431], [222, 384]], "text": "WELL", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WELL", "recog_valid": true, "glyph_recog_text": "WELL", "glyph_recog_ld": 1.0}, {"polygon": [[235, 382], [233, 431], [357, 431], [356, 385]], "text": "SOON", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SOON", "recog_valid": true, "glyph_recog_text": "SOON", "glyph_recog_ld": 1.0}, {"polygon": [[369, 382], [370, 431], [443, 431], [443, 384]], "text": "PLZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "PLZ", "recog_valid": true, "glyph_recog_text": "PLZ", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024600.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417822.jpg", "caption": "a red and white airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417823.jpg", "caption": "great egret and black skimmer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417832.jpg", "caption": "a woman holding three teddy bears", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024621.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000548935.jpg", "caption": "a group of people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417870.jpg", "caption": "a man on a skateboard doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417885.jpg", "caption": "a white kitchen with a white cabinet and a white refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155741.jpg", "caption": "a pizza with a fork on a cutting board", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000417891.jpg", "caption": "two green double decker buses parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155751.jpg", "caption": "a large white and yellow airplane parked at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024699.jpg", "caption": "a stop sign and telephone pole", "annotations": [{"polygon": [[469, 182], [471, 183], [512, 183], [512, 213], [465, 213]], "text": "STO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STO", "recog_valid": true, "glyph_recog_text": "STO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155775.jpg", "caption": "a man on skis standing on a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024714.jpg", "caption": "a woman sitting on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549003.jpg", "caption": "a woman is drinking a cup of coffee at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024716.jpg", "caption": "a woman and a baby are posing for a picture with a large pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155790.jpg", "caption": "photo of mets vs braves game-used baseball", "annotations": [{"polygon": [[97, 220], [124, 236], [54, 343], [30, 335]], "text": "Wilson", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "No 7eca", "recog_valid": false, "glyph_recog_text": "Wilson", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549017.jpg", "caption": "a baseball player is at home plate ready to swing", "annotations": [{"polygon": [[147, 187], [154, 217], [186, 211], [183, 183]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "13", "recog_valid": true, "glyph_recog_text": "13", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286877.jpg", "caption": "a train car with empty seats and windows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286897.jpg", "caption": "a yellow train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024782.jpg", "caption": "a double decker bus with people on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000286930.jpg", "caption": "a group of baseball players standing around a pitcher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024787.jpg", "caption": "a refrigerator with magnets on the door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024794.jpg", "caption": "a man and a woman are looking at their cell phones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024815.jpg", "caption": "a cell phone and a makeup palette on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155894.jpg", "caption": "a man walking with a stuffed animal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024823.jpg", "caption": "a kitchen with a microwave and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549122.jpg", "caption": "a black and white photo of a street sign", "annotations": [{"polygon": [[212, 119], [213, 126], [273, 84], [261, 83]], "text": "DARLING", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "DARLING", "recog_valid": true, "glyph_recog_text": "uaniisG", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[221, 129], [219, 146], [266, 117], [265, 102]], "text": "Kings", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Kings", "recog_valid": true, "glyph_recog_text": "Kengs", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[215, 157], [214, 168], [306, 110], [305, 99]], "text": "Kings", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ST VINCENTS", "recog_valid": false, "glyph_recog_text": "A i t t t", "glyph_recog_ld": 0.0909099173546205}, {"polygon": [[217, 172], [217, 183], [299, 134], [298, 123]], "text": "EMERGENCY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EMERGENCY", "recog_valid": true, "glyph_recog_text": "车餐季A作车N0Y", "glyph_recog_ld": 0.22222308641879285}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418054.jpg", "caption": "a small airplane is on display in a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549133.jpg", "caption": "a plate with scones and jam on it", "annotations": [{"polygon": [[135, 160], [134, 169], [140, 175], [159, 184], [183, 191], [187, 183], [161, 176], [142, 167]], "text": "jam", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "rouloenn", "recog_valid": false, "glyph_recog_text": ":am", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418065.jpg", "caption": "a man in a blue shirt and sunglasses on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155936.jpg", "caption": "a baseball player standing in front of a net", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155941.jpg", "caption": "a baseball player is about to swing at a pitch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418087.jpg", "caption": "a baseball game in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549168.jpg", "caption": "a plate with a sandwich and french fries on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287024.jpg", "caption": "a man in a white shirt and black shorts is playing tennis", "annotations": [{"polygon": [[162, 40], [161, 118], [428, 121], [427, 55]], "text": "BULOVA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BU SVA", "recog_valid": false, "glyph_recog_text": "BULOVA", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[86, 148], [85, 176], [168, 175], [167, 145]], "text": "ATCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ATCH", "recog_valid": true, "glyph_recog_text": "ATCH", "glyph_recog_ld": 1.0}, {"polygon": [[170, 147], [176, 177], [246, 172], [247, 148]], "text": "TWIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TIW", "recog_valid": false, "glyph_recog_text": "TWIN", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[344, 147], [353, 178], [383, 175], [382, 148]], "text": "VE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VE", "recog_valid": true, "glyph_recog_text": "VE", "glyph_recog_ld": 1.0}, {"polygon": [[391, 146], [391, 177], [427, 177], [428, 148]], "text": "SP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SPI", "recog_valid": false, "glyph_recog_text": "SP", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[166, 216], [147, 231], [149, 251], [154, 288], [191, 288], [214, 289], [226, 252], [225, 216]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "13", "recog_valid": true, "glyph_recog_text": "13", "glyph_recog_ld": 1.0}, {"polygon": [[358, 217], [338, 232], [340, 246], [348, 284], [374, 291], [416, 286], [428, 240], [426, 214]], "text": "17", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "17", "recog_valid": true, "glyph_recog_text": "17", "glyph_recog_ld": 1.0}, {"polygon": [[230, 384], [230, 432], [428, 433], [427, 379]], "text": "mirat", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "mirat", "recog_valid": true, "glyph_recog_text": "mirat", "glyph_recog_ld": 1.0}, {"polygon": [[85, 396], [82, 423], [178, 421], [176, 393]], "text": "OPEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "S OPEN", "recog_valid": false, "glyph_recog_text": "OPEN", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549184.jpg", "caption": "two men sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155981.jpg", "caption": "a train car with graffiti on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418134.jpg", "caption": "a girl in a white shirt and black shorts is holding a tennis racket", "annotations": [{"polygon": [[161, 213], [154, 241], [232, 249], [235, 229]], "text": "Titans", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IITANS", "recog_valid": false, "glyph_recog_text": "Titans", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418141.jpg", "caption": "a kitchen counter with a fan, a fan, and a fan", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000155999.jpg", "caption": "a yellow fire hydrant with eyes painted on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156020.jpg", "caption": "a flatbread pizza with broccoli and cheese", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418172.jpg", "caption": "a person sitting on a bench with a bag on their back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418181.jpg", "caption": "a stop sign in front of tall buildings", "annotations": [{"polygon": [[226, 377], [257, 375], [272, 380], [273, 405], [256, 403], [242, 402], [221, 403], [223, 396], [224, 379]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024972.jpg", "caption": "a group photo of people in suits and ties", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000024980.jpg", "caption": "a train is pulling into a station with a man sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156066.jpg", "caption": "a table with bananas and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287140.jpg", "caption": "a group of motorcycles parked next to a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418215.jpg", "caption": "a sign on a brick wall", "annotations": [{"polygon": [[148, 317], [152, 359], [175, 355], [199, 355], [219, 358], [217, 311], [194, 309], [170, 311]], "text": "HOME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HOME", "recog_valid": true, "glyph_recog_text": "HOME", "glyph_recog_ld": 1.0}, {"polygon": [[243, 317], [236, 362], [252, 369], [280, 380], [328, 408], [329, 355], [289, 335]], "text": "CAFE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CAFE", "recog_valid": true, "glyph_recog_text": "CAFE", "glyph_recog_ld": 1.0}, {"polygon": [[192, 399], [190, 426], [294, 468], [294, 434]], "text": "WASHINGTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WASHINGTON", "recog_valid": true, "glyph_recog_text": "WASHINGTON", "glyph_recog_ld": 1.0}, {"polygon": [[198, 94], [200, 124], [253, 113], [249, 93]], "text": "Coke", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Coke", "recog_valid": true, "glyph_recog_text": "Coke", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418229.jpg", "caption": "a man is standing next to a large truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156156.jpg", "caption": "a cat laying on a person's lap next to a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549373.jpg", "caption": "a red fire hydrant sitting on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156171.jpg", "caption": "a dog looking out the window of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287245.jpg", "caption": "a fire truck driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025105.jpg", "caption": "a group of people walking near a white airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549404.jpg", "caption": "a qantas airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418333.jpg", "caption": "a bento box with food and a plastic container", "annotations": [{"polygon": [[77, 125], [87, 145], [201, 87], [189, 66]], "text": "Parasite", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Parasite", "recog_valid": true, "glyph_recog_text": "Parasite", "glyph_recog_ld": 1.0}, {"polygon": [[106, 183], [124, 215], [238, 156], [216, 121]], "text": "PALS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PALS", "recog_valid": true, "glyph_recog_text": "PALS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549409.jpg", "caption": "a table with a cell phone, pencils, and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156213.jpg", "caption": "a sandwich and chips on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287289.jpg", "caption": "a coffee maker and a cup of coffee on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418372.jpg", "caption": "a bag of food including a banana, apple, and a bag of nuts", "annotations": [{"polygon": [[40, 309], [29, 323], [36, 329], [74, 330], [141, 333], [152, 343], [163, 329], [168, 318], [101, 311], [69, 312]], "text": "scrummy!", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "scrummy", "recog_valid": false, "glyph_recog_text": "scrummy!", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418379.jpg", "caption": "a plane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549459.jpg", "caption": "a vintage kitchen with a stove, sink, and cabinets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025174.jpg", "caption": "a person's hand on a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418394.jpg", "caption": "a kitchen counter with many different types of condiments", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287324.jpg", "caption": "a clock made out of legos on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418397.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287337.jpg", "caption": "a large truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549488.jpg", "caption": "a white dog sitting on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418418.jpg", "caption": "a baseball player swinging at a pitch during a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549496.jpg", "caption": "a large elephant is in a large arena with a crowd watching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549501.jpg", "caption": "a boy riding a skateboard in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287372.jpg", "caption": "a baseball player holding a bat on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418445.jpg", "caption": "a man with a beard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025230.jpg", "caption": "a table with several boxes of donuts and cupcakes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287374.jpg", "caption": "two men playing frisbee in a field", "annotations": [{"polygon": [[81, 186], [86, 222], [126, 216], [150, 210], [146, 176]], "text": "DOGS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "00R&", "recog_valid": false, "glyph_recog_text": "DOGS", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287378.jpg", "caption": "hyatt on the bind", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025237.jpg", "caption": "a little girl brushing her teeth with a boy", "annotations": [{"polygon": [[125, 256], [127, 304], [61, 321], [62, 270]], "text": "IE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "日", "recog_valid": false, "glyph_recog_text": "IE", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025274.jpg", "caption": "a crowd of people standing in a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287427.jpg", "caption": "a cake with a man and a dog", "annotations": [{"polygon": [[91, 83], [100, 83], [109, 87], [120, 94], [135, 100], [152, 105], [164, 111], [177, 115], [194, 116], [209, 116], [230, 110], [248, 104], [260, 99], [266, 133], [244, 139], [226, 145], [209, 147], [195, 148], [168, 143], [151, 137], [132, 130], [120, 123], [109, 116], [90, 115]], "text": "BIRTHDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BIRTHDAY", "recog_valid": true, "glyph_recog_text": "BIRTHDAY", "glyph_recog_ld": 1.0}, {"polygon": [[276, 99], [300, 91], [319, 90], [344, 90], [367, 93], [391, 105], [409, 116], [420, 127], [408, 152], [392, 141], [378, 129], [359, 123], [330, 121], [311, 124], [295, 129], [279, 135]], "text": "EDWARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "EDWARD", "recog_valid": true, "glyph_recog_text": "EDWARD", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025283.jpg", "caption": "a stop sign on a pole", "annotations": [{"polygon": [[311, 93], [311, 124], [345, 125], [371, 125], [402, 125], [402, 95], [370, 94]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549581.jpg", "caption": "a group of people in a dragon boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025325.jpg", "caption": "a table with a book, a glass of water, and a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549644.jpg", "caption": "a bathroom with a toilet and sink in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287519.jpg", "caption": "a man wearing a vest", "annotations": [{"polygon": [[251, 348], [269, 345], [286, 338], [294, 332], [303, 325], [306, 341], [297, 348], [287, 353], [280, 357], [263, 362], [252, 366]], "text": "POLICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "POLICE", "recog_valid": true, "glyph_recog_text": "POLICE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418600.jpg", "caption": "a woman talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156459.jpg", "caption": "a hot dog on a bun", "annotations": [{"polygon": [[266, 199], [251, 179], [259, 168], [336, 159], [333, 174], [308, 198], [289, 204]], "text": "Pepper", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PBO", "recog_valid": false, "glyph_recog_text": "Peppe", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418609.jpg", "caption": "a colorful bus with people on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156471.jpg", "caption": "a man in a black shirt is looking at his cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549709.jpg", "caption": "a cat sitting on a mouse pad", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287568.jpg", "caption": "a girl in purple dress holding a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156499.jpg", "caption": "a frisbee is stuck in a tree", "annotations": [{"polygon": [[370, 303], [372, 313], [339, 336], [336, 324]], "text": "JACKSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "rksov", "recog_valid": false, "glyph_recog_text": "AOXBCN", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549721.jpg", "caption": "a baseball player is swinging at a ball", "annotations": [{"polygon": [[116, 88], [178, 94], [178, 132], [113, 126]], "text": "FOX", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "FOX", "recog_valid": true, "glyph_recog_text": "FOX", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156510.jpg", "caption": "a man riding a skateboard down a road", "annotations": [{"polygon": [[113, 198], [113, 198], [132, 198], [136, 200], [116, 239], [96, 235]], "text": "51", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "5 1", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156512.jpg", "caption": "a man on a snowboard doing a trick in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287585.jpg", "caption": "a plastic container with a salad, fruit and a meat dish", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418675.jpg", "caption": "a boat is parked in front of a house and chairs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156534.jpg", "caption": "a man and woman standing next to a giraffe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549784.jpg", "caption": "a green motorcycle with a face on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418723.jpg", "caption": "a man and a woman eating a sandwich on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156594.jpg", "caption": "people are playing a video game on a screen", "annotations": [{"polygon": [[56, 203], [189, 210], [191, 242], [58, 241], [57, 203]], "text": "CHANGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Cltr nge", "recog_valid": false, "glyph_recog_text": "CHANGE", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[199, 206], [249, 206], [248, 237], [200, 237], [200, 208]], "text": "Mii", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Mii", "recog_valid": true, "glyph_recog_text": "Mii", "glyph_recog_ld": 1.0}, {"polygon": [[472, 239], [473, 203], [512, 203], [512, 237], [472, 239]], "text": "O", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "OI", "recog_valid": false, "glyph_recog_text": "o", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025528.jpg", "caption": "a baseball player is swinging a bat at a ball", "annotations": [{"polygon": [[428, 381], [428, 381], [468, 384], [468, 384], [469, 356], [469, 356], [430, 352]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "13", "recog_valid": true, "glyph_recog_text": "13", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156608.jpg", "caption": "two men sitting on a couch with a table full of alcohol", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156624.jpg", "caption": "a statue of liberty with birds flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287716.jpg", "caption": "a sandwich and a cup of coffee on a tray", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287735.jpg", "caption": "a woman holding a cat in front of a mirror", "annotations": [{"polygon": [[416, 61], [413, 73], [407, 85], [417, 87], [432, 93], [456, 89], [452, 77], [447, 71], [432, 60]], "text": "sana", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Swn", "recog_valid": false, "glyph_recog_text": "sana", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[468, 58], [456, 65], [454, 73], [457, 85], [472, 91], [487, 91], [505, 91], [510, 90], [509, 71], [504, 57], [492, 51], [474, 54]], "text": "Slama", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Slieot", "recog_valid": false, "glyph_recog_text": "Slama", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418810.jpg", "caption": "a boat on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418812.jpg", "caption": "a table with a lot of food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549898.jpg", "caption": "a motorcycle rider and a police officer riding on the freeway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549910.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549915.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025644.jpg", "caption": "a train is on the tracks in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287792.jpg", "caption": "a man in a suit and tie is standing next to a pyramid", "annotations": [{"polygon": [[155, 102], [167, 110], [176, 102], [194, 88], [215, 76], [248, 67], [269, 66], [295, 68], [298, 69], [301, 54], [300, 49], [280, 50], [258, 50], [237, 51], [221, 55], [201, 63], [188, 71], [178, 80], [156, 102]], "text": "PYRAMID", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PYRAMID", "recog_valid": true, "glyph_recog_text": "PYRAMID", "glyph_recog_ld": 1.0}, {"polygon": [[333, 82], [343, 90], [355, 98], [371, 116], [380, 130], [394, 157], [399, 191], [416, 188], [410, 158], [402, 134], [394, 118], [385, 106], [378, 97], [363, 85], [343, 70], [340, 70], [333, 83]], "text": "SCHEME", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SCHEML", "recog_valid": false, "glyph_recog_text": "SCHEME", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025649.jpg", "caption": "a chili dog on a napkin", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287794.jpg", "caption": "a man playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025675.jpg", "caption": "a baseball player swinging a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287822.jpg", "caption": "a building with a sign on it has texts", "annotations": [{"polygon": [[155, 305], [259, 305], [258, 337], [155, 338]], "text": "Coca Cola", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cscl", "recog_valid": false, "glyph_recog_text": "Coca Cola", "glyph_recog_ld": 0.333334074073251}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418894.jpg", "caption": "two people laying on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000549972.jpg", "caption": "coffee mug with pen and laptop on wooden table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287831.jpg", "caption": "a baseball player in the middle of throwing a pitch", "annotations": [{"polygon": [[57, 242], [57, 243], [111, 244], [112, 208], [58, 208]], "text": "50", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "50", "recog_valid": true, "glyph_recog_text": "50", "glyph_recog_ld": 1.0}, {"polygon": [[-2, 206], [41, 232], [32, 250], [0, 234], [0, 234]], "text": "AVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "we", "recog_valid": false, "glyph_recog_text": "AVE", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287833.jpg", "caption": "a woman is getting off a train at a station", "annotations": [{"polygon": [[384, 151], [383, 189], [408, 190], [412, 175], [414, 147], [396, 150]], "text": "SJ", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "云", "recog_valid": false, "glyph_recog_text": "00-", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287849.jpg", "caption": "a wii music machine", "annotations": [{"polygon": [[285, 93], [286, 111], [360, 101], [359, 81]], "text": "Showing", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Showing", "recog_valid": true, "glyph_recog_text": "Showing", "glyph_recog_ld": 1.0}, {"polygon": [[228, 130], [279, 122], [279, 158], [228, 163]], "text": "Wii", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wii", "recog_valid": true, "glyph_recog_text": "Wii", "glyph_recog_ld": 1.0}, {"polygon": [[283, 156], [283, 122], [384, 108], [386, 145]], "text": "iMUSIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Music", "recog_valid": false, "glyph_recog_text": "IMUSIC", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418927.jpg", "caption": "a young boy in a blue and yellow apron", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287870.jpg", "caption": "a woman sitting in a cell holding a tray of food", "annotations": [{"polygon": [[454, 339], [455, 346], [464, 343], [471, 343], [481, 347], [487, 353], [491, 363], [494, 377], [500, 378], [500, 365], [495, 353], [490, 347], [485, 341], [475, 336], [466, 336], [455, 339]], "text": "MIDLANDS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "DL4NO", "recog_valid": false, "glyph_recog_text": "MIOLANDS", "glyph_recog_ld": 0.2500009374988281}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287884.jpg", "caption": "a man is doing a trick on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156823.jpg", "caption": "three sheep standing in front of a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287904.jpg", "caption": "two men playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550049.jpg", "caption": "a tennis match with spectators watching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000418982.jpg", "caption": "a large jet airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156849.jpg", "caption": "a laptop computer sitting on a table with a drink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025777.jpg", "caption": "two girls playing soccer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287927.jpg", "caption": "a toilet with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287934.jpg", "caption": "a living room with two couches, a television and a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025797.jpg", "caption": "a stop sign with stickers on it", "annotations": [{"polygon": [[169, 191], [319, 190], [356, 187], [370, 190], [382, 210], [386, 235], [385, 257], [378, 274], [364, 280], [136, 281]], "text": "ALTO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALTO", "recog_valid": true, "glyph_recog_text": "ALTO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025799.jpg", "caption": "ashton guitars at the show", "annotations": [{"polygon": [[215, 361], [162, 306], [180, 90], [234, 118]], "text": "Aston", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "08145", "recog_valid": false, "glyph_recog_text": "cn oc", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[334, 163], [333, 124], [420, 125], [420, 154]], "text": "Ashton", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ashto", "recog_valid": false, "glyph_recog_text": "Ashton", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[321, 488], [314, 512], [324, 513], [357, 491], [359, 471]], "text": "Ashton", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Ashion", "recog_valid": false, "glyph_recog_text": "Ashton", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000287953.jpg", "caption": "a desk with a laptop and a computer on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156895.jpg", "caption": "a small plane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550113.jpg", "caption": "a boat is docked on the beach with a blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156904.jpg", "caption": "a car parked on a runway with a plane in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025833.jpg", "caption": "a man standing next to a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419056.jpg", "caption": "a range rover parked under a bridge with a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550147.jpg", "caption": "a man and a child on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288021.jpg", "caption": "a bus driving down a street with a man standing in the middle of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288025.jpg", "caption": "a stop sign is on the side of a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000156963.jpg", "caption": "a red double decker bus driving down a snowy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288039.jpg", "caption": "a young girl holding a baseball bat on a field", "annotations": [{"polygon": [[185, 437], [192, 425], [218, 389], [281, 379], [305, 376], [306, 417], [293, 436], [257, 439], [194, 438], [185, 438]], "text": "Mets", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Mou", "recog_valid": false, "glyph_recog_text": "Mets", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288054.jpg", "caption": "a bus with graffiti on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000025917.jpg", "caption": "a steam engine train is pulling into the station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157006.jpg", "caption": "a man holding a wii remote", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157020.jpg", "caption": "a police officer riding a bike next to a woman", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157026.jpg", "caption": "a man and woman standing in front of a food stand", "annotations": [{"polygon": [[402, 124], [458, 142], [458, 156], [396, 136], [402, 125]], "text": "ANCHOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "NCHOR", "recog_valid": false, "glyph_recog_text": "ANCHOR", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288106.jpg", "caption": "a cat sitting on top of a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157037.jpg", "caption": "a mall with many people walking around and shopping", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419186.jpg", "caption": "a woman in a red and white striped shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419193.jpg", "caption": "a man in a suit and tie talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288131.jpg", "caption": "a street sign with the name taylor and mulberry st", "annotations": [{"polygon": [[160, 161], [159, 169], [162, 224], [168, 228], [200, 241], [212, 248], [237, 258], [251, 265], [266, 272], [276, 275], [276, 225], [256, 212]], "text": "TAYLOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TAYLOR", "recog_valid": true, "glyph_recog_text": "TAYLOR", "glyph_recog_ld": 1.0}, {"polygon": [[146, 310], [144, 365], [358, 343], [371, 281], [287, 290], [245, 294]], "text": "MULBERRY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MULBERRY", "recog_valid": true, "glyph_recog_text": "MULBERRY", "glyph_recog_ld": 1.0}, {"polygon": [[395, 306], [390, 336], [399, 343], [409, 339], [427, 340], [433, 308]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "s", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[100, 314], [98, 329], [97, 339], [105, 340], [107, 335], [113, 338], [120, 336], [130, 336], [134, 330], [135, 308]], "text": "500", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "500", "recog_valid": true, "glyph_recog_text": "500", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026002.jpg", "caption": "a stop sign with a street sign on it", "annotations": [{"polygon": [[279, 173], [373, 159], [369, 175], [370, 188], [283, 200]], "text": "MURPHY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MURPHY", "recog_valid": true, "glyph_recog_text": "MURPHY", "glyph_recog_ld": 1.0}, {"polygon": [[299, 252], [404, 238], [409, 260], [392, 267], [394, 287], [303, 298], [295, 287], [295, 260]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026007.jpg", "caption": "a steam engine train is on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288157.jpg", "caption": "a dog laying on the floor with a water bottle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419232.jpg", "caption": "a group of children eating pizza at a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026028.jpg", "caption": "a group of clocks on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026031.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[100, 131], [91, 103], [2, 96], [2, 125]], "text": "DINA", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "DINA", "recog_valid": true, "glyph_recog_text": "DINA", "glyph_recog_ld": 1.0}, {"polygon": [[416, 165], [416, 123], [510, 125], [512, 171]], "text": "ach", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ach", "recog_valid": true, "glyph_recog_text": "ach", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550333.jpg", "caption": "a living room with a couch, table and chairs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288194.jpg", "caption": "a person sitting on a brick sidewalk with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026052.jpg", "caption": "a plate of food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550353.jpg", "caption": "a computer monitor, keyboard, mouse, and a box on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550354.jpg", "caption": "two people standing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157160.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026097.jpg", "caption": "a snow covered street at night with cars parked on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550392.jpg", "caption": "a woman eating a doughnut with sprinkles", "annotations": [{"polygon": [[294, 412], [293, 419], [370, 447], [377, 447], [377, 444]], "text": "OOUGHNUTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "boUoUNVI!", "recog_valid": false, "glyph_recog_text": "款y家生n务klx", "glyph_recog_ld": 1.1111098765503868e-06}, {"polygon": [[346, 408], [336, 425], [380, 438], [389, 421]], "text": "hreme", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "hheme", "recog_valid": false, "glyph_recog_text": "hreme", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[304, 392], [296, 411], [334, 425], [345, 408]], "text": "KRISPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "huspy", "recog_valid": false, "glyph_recog_text": "KRISPY", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026109.jpg", "caption": "a clock and a copper cylinder on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026118.jpg", "caption": "a man holding a tennis racket in front of a crowd", "annotations": [{"polygon": [[20, 242], [99, 259], [92, 274], [17, 257]], "text": "REVIEW", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "REVIEW", "recog_valid": true, "glyph_recog_text": "REVIEW", "glyph_recog_ld": 1.0}, {"polygon": [[0, 261], [67, 274], [63, 292], [0, 279]], "text": "ENNIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ENNIS", "recog_valid": true, "glyph_recog_text": "ENNIS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157190.jpg", "caption": "a man is blending a drink in a blender", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288262.jpg", "caption": "a stop sign on a street corner", "annotations": [{"polygon": [[216, 152], [216, 192], [313, 189], [311, 149]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288275.jpg", "caption": "a microwave with a bowl inside of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026132.jpg", "caption": "a woman is standing next to a lawn mower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026133.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288278.jpg", "caption": "a model train set with red caboose and other trains", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550421.jpg", "caption": "a man pushing a cart full of luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026147.jpg", "caption": "two people sitting on a bench overlooking the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550449.jpg", "caption": "a woman holding a pizza and a sunflower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026162.jpg", "caption": "a stop sign is on a pole near a boat", "annotations": [{"polygon": [[215, 227], [351, 226], [352, 305], [219, 308]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550453.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157249.jpg", "caption": "a variety of fruits and vegetables are displayed in crates", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550478.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[411, 420], [429, 456], [463, 442], [462, 403]], "text": "L", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "C", "recog_valid": false, "glyph_recog_text": "L", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550482.jpg", "caption": "a white and red airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026208.jpg", "caption": "a pizza on a cutting board with a bottle of beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419439.jpg", "caption": "a school bus parked in the snow with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419442.jpg", "caption": "a large airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288379.jpg", "caption": "a group of people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026241.jpg", "caption": "a man doing a skateboard trick on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550532.jpg", "caption": "two parking meters are on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550540.jpg", "caption": "a man with a dog catching a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550543.jpg", "caption": "a stop sign and a broken down traffic sign on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026263.jpg", "caption": "a banana with writing on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026266.jpg", "caption": "a green street sign with white letters", "annotations": [{"polygon": [[232, 236], [291, 205], [293, 244], [233, 273]], "text": "MAIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MAIN", "recog_valid": true, "glyph_recog_text": "MAIN", "glyph_recog_ld": 1.0}, {"polygon": [[126, 257], [343, 305], [346, 340], [125, 296]], "text": "WINCHESTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WINCHESTER", "recog_valid": true, "glyph_recog_text": "WINCHESTER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419489.jpg", "caption": "a man standing in front of a pizza box", "annotations": [{"polygon": [[136, 245], [83, 309], [105, 328], [152, 262]], "text": "CALIF", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CALIF", "recog_valid": true, "glyph_recog_text": "CALIF", "glyph_recog_ld": 1.0}, {"polygon": [[178, 263], [132, 329], [115, 315], [161, 251]], "text": "ORNIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ORNIA", "recog_valid": true, "glyph_recog_text": "ORNIA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157345.jpg", "caption": "a display case filled with various pastries", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157356.jpg", "caption": "a young girl playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419501.jpg", "caption": "a desk with a laptop and a computer on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157393.jpg", "caption": "two men riding a motorcycle on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288471.jpg", "caption": "a small refrigerator and a small table next to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288472.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[166, 88], [165, 125], [250, 121], [250, 83]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[163, 133], [162, 169], [248, 167], [250, 130]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[288, 34], [289, 70], [448, 63], [448, 28]], "text": "SEELEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SEELEY", "recog_valid": true, "glyph_recog_text": "SEELEY", "glyph_recog_ld": 1.0}, {"polygon": [[121, 317], [118, 392], [286, 394], [299, 316]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550649.jpg", "caption": "a pizza on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550655.jpg", "caption": "a baseball game in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288513.jpg", "caption": "a black and white photo of a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419586.jpg", "caption": "a table with a bunch of pizzas on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419587.jpg", "caption": "a train traveling down the tracks with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026388.jpg", "caption": "a man in glasses and a suit is speaking", "annotations": [{"polygon": [[0, 160], [85, 161], [86, 213], [1, 211]], "text": "ENT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "IENT", "recog_valid": false, "glyph_recog_text": "ENT", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157462.jpg", "caption": "a train is traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288547.jpg", "caption": "a sticker on the back of a car has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419632.jpg", "caption": "two men standing next to a red light", "annotations": [{"polygon": [[295, 374], [293, 402], [382, 406], [382, 406], [382, 381]], "text": "HERE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "HERE", "recog_valid": true, "glyph_recog_text": "HERE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288569.jpg", "caption": "a train at a station with a sign has texts", "annotations": [{"polygon": [[172, 310], [181, 324], [179, 336], [161, 347], [137, 338], [139, 325]], "text": "RENFE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "renfe", "recog_valid": false, "glyph_recog_text": "RENFE", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419650.jpg", "caption": "a boy is swinging a baseball bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419666.jpg", "caption": "a table with four containers of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419679.jpg", "caption": "a train traveling down the tracks near a mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550766.jpg", "caption": "a man and woman riding on a bike in a barn", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419711.jpg", "caption": "a black and white photo of a clock on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550783.jpg", "caption": "a street sign and a bike path next to a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419714.jpg", "caption": "a man on a horse", "annotations": [{"polygon": [[9, 27], [53, 26], [60, 57], [12, 47]], "text": "WA", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "WA", "recog_valid": true, "glyph_recog_text": "WA", "glyph_recog_ld": 1.0}, {"polygon": [[69, 27], [115, 39], [113, 70], [68, 59]], "text": "DO", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "DO", "recog_valid": true, "glyph_recog_text": "DO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288651.jpg", "caption": "a group of people on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288659.jpg", "caption": "a young boy riding a skateboard in a skate park", "annotations": [{"polygon": [[78, 426], [77, 398], [143, 401], [141, 431]], "text": "Anyone", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Anyone", "recog_valid": true, "glyph_recog_text": "Anyone", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026521.jpg", "caption": "two horses are standing in a pen together", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026537.jpg", "caption": "a man in a red suit skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419759.jpg", "caption": "a large white and blue airplane sitting on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157618.jpg", "caption": "a large body of water with many boats in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419764.jpg", "caption": "a man doing a skateboard trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288694.jpg", "caption": "a young boy stands in front of a wall with graffiti", "annotations": [{"polygon": [[232, 208], [232, 163], [303, 152], [306, 191]], "text": "EMO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EIC", "recog_valid": false, "glyph_recog_text": "EMO", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157639.jpg", "caption": "a table with a lot of food and drinks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419785.jpg", "caption": "a baseball player is getting ready to hit the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157642.jpg", "caption": "a large truck with a large logo on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026576.jpg", "caption": "a man jumping to hit a tennis ball", "annotations": [{"polygon": [[27, 81], [122, 84], [120, 205], [27, 200]], "text": "N", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "z", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[0, 272], [96, 273], [98, 308], [1, 305]], "text": "HIPS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HIPS", "recog_valid": true, "glyph_recog_text": "HIPS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026577.jpg", "caption": "a group of people playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550874.jpg", "caption": "a bus with a campaign sign on it", "annotations": [{"polygon": [[398, 215], [399, 249], [434, 252], [433, 227]], "text": "ROMNEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "OMNE", "recog_valid": false, "glyph_recog_text": "Roue,", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[268, 205], [268, 230], [305, 235], [303, 215]], "text": "MORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MORE", "recog_valid": true, "glyph_recog_text": "MORE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288739.jpg", "caption": "a parade of people riding on a fire truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419815.jpg", "caption": "a blue and white bus parked next to a blue and white bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026602.jpg", "caption": "a woman holding a bat", "annotations": [{"polygon": [[93, 202], [105, 185], [184, 200], [171, 216]], "text": "CYCLONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ECEUS!", "recog_valid": false, "glyph_recog_text": "CYCLONE", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[303, 247], [332, 248], [347, 247], [362, 232], [372, 220], [387, 219], [383, 237], [375, 252], [368, 266], [360, 279], [347, 288], [334, 296], [311, 306]], "text": "Biscu", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Barp", "recog_valid": false, "glyph_recog_text": "Biscu", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157675.jpg", "caption": "a man standing next to an old train engine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419822.jpg", "caption": "a train on the tracks with a light on the front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026625.jpg", "caption": "a fruit stand with many different types of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550934.jpg", "caption": "the dark knight joker figure is shown", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419879.jpg", "caption": "a traffic light and a street sign on a pole", "annotations": [{"polygon": [[123, 158], [169, 130], [191, 116], [217, 96], [228, 89], [222, 113], [185, 138], [141, 166], [119, 179]], "text": "BROADWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BROADWAY", "recog_valid": true, "glyph_recog_text": "BROADWAY", "glyph_recog_ld": 1.0}, {"polygon": [[234, 82], [262, 60], [263, 71], [233, 93]], "text": "BLVD", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BLVD", "recog_valid": true, "glyph_recog_text": "RLY品", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550955.jpg", "caption": "a tennis player is swinging his racket at a ball", "annotations": [{"polygon": [[6, 236], [6, 302], [127, 309], [141, 285], [138, 251], [124, 228]], "text": "SE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SEO", "recog_valid": false, "glyph_recog_text": "SE", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[393, 238], [396, 273], [478, 273], [474, 235]], "text": "Emirates", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Emirates", "recog_valid": true, "glyph_recog_text": "Emirates", "glyph_recog_ld": 1.0}, {"polygon": [[482, 236], [480, 273], [513, 273], [513, 237]], "text": "Airl", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Air", "recog_valid": false, "glyph_recog_text": "A", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157744.jpg", "caption": "a microwave and a refrigerator sitting on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000550993.jpg", "caption": "a bookcase with books and other items on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551015.jpg", "caption": "people walking on the beach near a boat", "annotations": [{"polygon": [[196, 387], [212, 392], [227, 360], [224, 356], [208, 360], [196, 384]], "text": "045", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "04-", "recog_valid": false, "glyph_recog_text": "045", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026733.jpg", "caption": "a kitten on a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157811.jpg", "caption": "a woman standing in a kitchen with boxes and a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288889.jpg", "caption": "a group of cyclists racing down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026747.jpg", "caption": "a man walking an elephant down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288906.jpg", "caption": "a woman sitting at a table with children", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000419996.jpg", "caption": "a jar with teddy bears and a candle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026784.jpg", "caption": "a man in a wheelchair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157872.jpg", "caption": "a group of people cutting a ribbon at a ceremony", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420021.jpg", "caption": "a large air canada airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026829.jpg", "caption": "a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420054.jpg", "caption": "a baseball player is standing at home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000288982.jpg", "caption": "a cat is holding a bottle of wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157920.jpg", "caption": "a man standing in a field with a baseball bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420065.jpg", "caption": "a man and a woman eating food in a public place", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157923.jpg", "caption": "a person walking on the street next to an orange trolley", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420066.jpg", "caption": "a fire hydrant sitting on a concrete slab", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289005.jpg", "caption": "a row of motorcycles parked in a row", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157933.jpg", "caption": "people looking at a display of cell phones in a tent", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289031.jpg", "caption": "a bowl of broccoli and cheese", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551185.jpg", "caption": "a traffic light with a sign that says california", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026900.jpg", "caption": "a double decker bus parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000157989.jpg", "caption": "a boy smiles as he eats pizza at a picnic table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420135.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551214.jpg", "caption": "a large passenger jet flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289071.jpg", "caption": "a street sign pointing to a street and a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289075.jpg", "caption": "a man with dreadlocks standing in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420151.jpg", "caption": "a woman walking down a train platform with a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026944.jpg", "caption": "a tall clock tower with a clock on it", "annotations": [{"polygon": [[326, 360], [352, 326], [461, 362], [443, 399]], "text": "MO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "MO", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420167.jpg", "caption": "a man on a skateboard doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551244.jpg", "caption": "a pizza with onions and ham on a table", "annotations": [{"polygon": [[344, 152], [334, 169], [341, 175], [367, 179], [383, 171], [390, 159], [387, 149], [380, 159], [366, 160], [356, 156]], "text": "Sirter ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5", "recog_valid": false, "glyph_recog_text": "Sirter", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158036.jpg", "caption": "a red double decker bus driving down a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158038.jpg", "caption": "a maroon truck parked in front of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420200.jpg", "caption": "a clock on a blue wall", "annotations": [{"polygon": [[118, 203], [118, 203], [161, 204], [196, 215], [244, 214], [347, 215], [374, 226], [373, 231], [289, 249], [137, 232], [118, 208]], "text": "norge", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "norqe", "recog_valid": false, "glyph_recog_text": "n o r g e", "glyph_recog_ld": 0.44444506172770915}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158058.jpg", "caption": "a man sitting on a couch", "annotations": [{"polygon": [[7, 5], [12, 65], [91, 64], [90, 3]], "text": "666", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "666", "recog_valid": true, "glyph_recog_text": "666", "glyph_recog_ld": 1.0}, {"polygon": [[20, 153], [20, 201], [115, 207], [113, 155]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "dCIS", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000026995.jpg", "caption": "a woman holding a teddy bear in a restaurant", "annotations": [{"polygon": [[57, 122], [55, 119], [57, 115], [56, 112], [54, 105], [53, 102], [67, 102], [69, 105], [81, 108], [84, 107], [89, 106], [91, 109], [96, 106], [101, 108], [101, 118], [108, 119], [110, 125], [119, 125], [116, 131], [118, 136], [121, 138], [120, 142], [120, 146], [107, 146], [105, 138], [82, 140], [67, 140], [67, 145], [63, 144], [52, 146], [48, 143], [48, 129], [52, 126], [55, 125]], "text": "BEN SPARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "bahk", "recog_valid": false, "glyph_recog_text": "BEN SPARK", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027008.jpg", "caption": "a motorcycle parked in front of a white car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027005.jpg", "caption": "a vintage truck with a barrel in the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551305.jpg", "caption": "a man is grooming a cat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027015.jpg", "caption": "two horses pulling a float down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420236.jpg", "caption": "a van with a kite flying in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158099.jpg", "caption": "a train with graffiti on it is parked at a station", "annotations": [{"polygon": [[189, 366], [223, 236], [335, 210], [332, 311]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "XV", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551341.jpg", "caption": "a tow truck with a flatbed trailer parked on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420290.jpg", "caption": "a stop sign and a sign has texts", "annotations": [{"polygon": [[290, 237], [290, 237], [334, 240], [343, 211], [343, 210], [293, 206], [290, 237]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551372.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[294, 220], [294, 220], [301, 199], [310, 195], [319, 198], [328, 204], [341, 216], [334, 223], [319, 235], [314, 226]], "text": "27", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "27", "recog_valid": true, "glyph_recog_text": "27", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289231.jpg", "caption": "a bus driving down a street with a mural on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158169.jpg", "caption": "a man eating cereal in a kitchen", "annotations": [{"polygon": [[306, 305], [302, 327], [327, 334], [349, 337], [352, 342], [365, 343], [367, 339], [370, 320], [356, 312]], "text": "Lucky", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Lucky", "recog_valid": true, "glyph_recog_text": "Lucky", "glyph_recog_ld": 1.0}, {"polygon": [[292, 324], [288, 349], [371, 363], [373, 358], [374, 346]], "text": "Charms", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Charms", "recog_valid": true, "glyph_recog_text": "Charms", "glyph_recog_ld": 1.0}, {"polygon": [[277, 257], [286, 273], [355, 245], [349, 230]], "text": "LIFE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OALIFE", "recog_valid": false, "glyph_recog_text": "LIFE", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027108.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[206, 299], [206, 299], [357, 214], [353, 190], [209, 277]], "text": "MASONIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MASONIC", "recog_valid": true, "glyph_recog_text": "MASONIC", "glyph_recog_ld": 1.0}, {"polygon": [[274, 225], [306, 206], [304, 194], [271, 214]], "text": "900", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "900", "recog_valid": true, "glyph_recog_text": "PQc", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289255.jpg", "caption": "a person pouring wine into a glass", "annotations": [{"polygon": [[248, 362], [222, 386], [225, 389], [235, 384], [249, 391], [260, 400], [286, 400], [292, 393]], "text": "plai", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "plai", "recog_valid": true, "glyph_recog_text": "plai", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158195.jpg", "caption": "a man in a suit and tie looking at a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158196.jpg", "caption": "a traffic light is shown on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158198.jpg", "caption": "a white lighthouse with a yellow top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551418.jpg", "caption": "a group of men standing behind a fence holding signs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027135.jpg", "caption": "a man in a red hat is doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551433.jpg", "caption": "a glider plane flying over a field with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420363.jpg", "caption": "a table with a laptop and flowers on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420366.jpg", "caption": "a group of remote controls sitting on a table", "annotations": [{"polygon": [[189, 367], [189, 394], [267, 392], [267, 363]], "text": "COX", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "COX", "recog_valid": true, "glyph_recog_text": "cox", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551441.jpg", "caption": "a stop sign and a stop sign on a pole", "annotations": [{"polygon": [[261, 174], [259, 201], [299, 210], [301, 185]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[220, 191], [219, 216], [254, 222], [255, 199]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420372.jpg", "caption": "a yellow truck parked in front of a brick wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027163.jpg", "caption": "a large building with a clock tower and a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027175.jpg", "caption": "a toy fire truck is parked on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420396.jpg", "caption": "a man in a hat is placing chocolate on trays", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551470.jpg", "caption": "a monorail train is traveling down a track", "annotations": [{"polygon": [[48, 267], [48, 287], [180, 270], [179, 249]], "text": "SYBNEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SYD NE Y", "recog_valid": false, "glyph_recog_text": "SYBNEY", "glyph_recog_ld": 0.6250004687494141}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551472.jpg", "caption": "a group of motorcycles parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420403.jpg", "caption": "a large white and red airplane parked at the airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420410.jpg", "caption": "a dog standing next to a yellow fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289352.jpg", "caption": "a man walking in front of a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158293.jpg", "caption": "a burger and a beer on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289367.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420442.jpg", "caption": "a person holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420446.jpg", "caption": "a stop sign has texts written on it", "annotations": [{"polygon": [[221, 145], [353, 160], [331, 226], [208, 211]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551522.jpg", "caption": "two women are working on pottery in a workshop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158322.jpg", "caption": "a man sitting in a chair", "annotations": [{"polygon": [[329, 239], [320, 248], [330, 249], [341, 253], [352, 261], [360, 274], [361, 283], [364, 283], [377, 276], [373, 265], [369, 258], [361, 250], [352, 245], [348, 241], [329, 239]], "text": "HATEAU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HATEAU", "recog_valid": true, "glyph_recog_text": "HATEAU", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158326.jpg", "caption": "three polar bears on the shore with a boat in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420475.jpg", "caption": "a man on a motorcycle doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420488.jpg", "caption": "a bus driving down a street with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027276.jpg", "caption": "a dog laying on the floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420496.jpg", "caption": "a city street at night with traffic lights and buildings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420500.jpg", "caption": "an old photo of a biplane parked on the ground", "annotations": [{"polygon": [[103, 349], [110, 370], [133, 382], [140, 381], [159, 363], [124, 350]], "text": "new", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "78", "recog_valid": false, "glyph_recog_text": "new", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[317, 376], [315, 401], [303, 410], [306, 413], [309, 411], [318, 402], [347, 408], [389, 397], [391, 390], [334, 376]], "text": "Jeyas", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Juas", "recog_valid": false, "glyph_recog_text": "Jeyas", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289428.jpg", "caption": "a living room with red furniture and black walls", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158362.jpg", "caption": "a man sitting on a bench using a laptop computer", "annotations": [{"polygon": [[1, 92], [1, 107], [6, 121], [34, 118], [52, 116], [44, 97], [38, 99], [15, 89]], "text": "fm", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "", "recog_valid": false, "glyph_recog_text": "f m", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[126, 96], [131, 120], [147, 121], [159, 120], [167, 112], [144, 89]], "text": "fm", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "C心", "recog_valid": false, "glyph_recog_text": "心", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027298.jpg", "caption": "a bathroom with a sink, toilet and mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551590.jpg", "caption": "an old black and white photo of a busy city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027302.jpg", "caption": "a man and a child on skis on a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420523.jpg", "caption": "a banana and a strawberry on a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420530.jpg", "caption": "a truck full of garbage and a horse", "annotations": [{"polygon": [[128, 419], [122, 437], [132, 443], [133, 443], [138, 436], [140, 435], [145, 437], [153, 437], [160, 437], [163, 436], [170, 437], [177, 437], [179, 437], [173, 447], [176, 449], [180, 446], [184, 439], [186, 434], [192, 426], [190, 425], [184, 433], [182, 430], [178, 428], [175, 427], [173, 423], [165, 422], [163, 421], [157, 429], [152, 428], [148, 427], [145, 426], [139, 426], [135, 426], [134, 420]], "text": "Ryan Way", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Roanlieoyf", "recog_valid": false, "glyph_recog_text": "Ryan Way", "glyph_recog_ld": 0.40000059999939996}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027352.jpg", "caption": "two giraffes walking in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551651.jpg", "caption": "a young man in an orange and black jacket carrying a snowboard", "annotations": [{"polygon": [[226, 174], [255, 179], [239, 207], [222, 205], [210, 195]], "text": "47", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A7", "recog_valid": false, "glyph_recog_text": "守", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027365.jpg", "caption": "two women playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289515.jpg", "caption": "a sign for donuts on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158451.jpg", "caption": "a baseball game is in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551679.jpg", "caption": "a red and white bus is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027412.jpg", "caption": "a group of men standing outside with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420629.jpg", "caption": "a black and white photo of a parked motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158486.jpg", "caption": "a large military plane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027424.jpg", "caption": "a man and woman standing in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289591.jpg", "caption": "a fire truck driving down a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551735.jpg", "caption": "a group of young boys sitting on a bench", "annotations": [{"polygon": [[43, 306], [38, 361], [71, 359], [72, 307]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "C)", "recog_valid": false, "glyph_recog_text": "0", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[231, 280], [231, 336], [279, 336], [276, 279]], "text": "11", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "11", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551757.jpg", "caption": "a woman holding a card with a picture of herself on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420692.jpg", "caption": "a young boy riding a snowboard down a snowy hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027482.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027486.jpg", "caption": "two girls playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027495.jpg", "caption": "a horse with its mouth open", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027504.jpg", "caption": "a green and white train traveling through a lush green field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027511.jpg", "caption": "a yellow and blue bus driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551829.jpg", "caption": "a clock on a pole in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289693.jpg", "caption": "a man riding a skateboard down a course of cones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551840.jpg", "caption": "a skateboarder is standing on a wooden board", "annotations": [{"polygon": [[269, 367], [292, 359], [310, 346], [311, 370], [296, 382], [267, 402]], "text": "BAKER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "花", "recog_valid": false, "glyph_recog_text": "BAKER", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551842.jpg", "caption": "a police officer on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289738.jpg", "caption": "a train is pulling into a station with a man standing next to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420823.jpg", "caption": "a man walking with a suitcase and a man walking with a bag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420831.jpg", "caption": "a clock on a shelf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289767.jpg", "caption": "a white police car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027622.jpg", "caption": "a man cutting a cake with a group of children", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027627.jpg", "caption": "a hand holding a piece of doughnut in front of a bus", "annotations": [{"polygon": [[82, 272], [82, 272], [109, 278], [119, 287], [123, 280], [162, 284], [160, 243], [80, 237]], "text": "Doughnut", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Doughnut", "recog_valid": true, "glyph_recog_text": "Doughnu", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[174, 285], [280, 300], [278, 258], [235, 254], [231, 244], [171, 242]], "text": "KITCHEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Kitchen", "recog_valid": false, "glyph_recog_text": "KITCHEN", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[75, 270], [25, 262], [25, 232], [30, 232], [32, 241], [46, 240], [48, 234], [53, 234], [53, 240], [74, 243]], "text": "American", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "baican", "recog_valid": false, "glyph_recog_text": "American", "glyph_recog_ld": 0.5000006249992187}, {"polygon": [[334, 136], [344, 163], [402, 173], [397, 147]], "text": "DOUGNUTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "au.", "recog_valid": false, "glyph_recog_text": "DOUGNUTS", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551922.jpg", "caption": "a woman with a red suitcase walking in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289795.jpg", "caption": "a group of people on top of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551941.jpg", "caption": "a large truck is parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158729.jpg", "caption": "a black car is parked at a stop light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420887.jpg", "caption": "a train traveling down the tracks in the woods", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027675.jpg", "caption": "a bus is parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420892.jpg", "caption": "a baseball player holding a bat in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420900.jpg", "caption": "a man is playing tennis on a court", "annotations": [{"polygon": [[275, 23], [400, 175], [426, 157], [427, 145], [415, 127], [398, 86], [392, 80], [371, 54], [358, 38], [322, 15], [307, 0], [290, 1], [284, 7]], "text": "adidas", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "adidas", "recog_valid": true, "glyph_recog_text": "adidas", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158759.jpg", "caption": "a statue of a man sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551983.jpg", "caption": "a red and white double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027706.jpg", "caption": "a cat sitting in front of a macbook pro", "annotations": [{"polygon": [[30, 302], [30, 302], [193, 283], [190, 251], [28, 264]], "text": "macbook", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "MacBook", "recog_valid": false, "glyph_recog_text": "macbook", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[198, 253], [198, 253], [241, 248], [238, 277], [201, 282]], "text": "Pro", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Pro", "recog_valid": true, "glyph_recog_text": "Pro", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000551994.jpg", "caption": "a black and white cat laying in a bowl", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420922.jpg", "caption": "a man holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027711.jpg", "caption": "a blue double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552004.jpg", "caption": "a cake with a thank you coach on it", "annotations": [{"polygon": [[107, 251], [100, 309], [131, 310], [138, 252]], "text": "123", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "123", "recog_valid": true, "glyph_recog_text": "-N", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[72, 354], [69, 379], [178, 390], [177, 351]], "text": "Thank ", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Thank", "recog_valid": false, "glyph_recog_text": "Thank", "glyph_recog_ld": 1.0}, {"polygon": [[202, 352], [205, 389], [265, 387], [263, 352]], "text": "You", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "You", "recog_valid": true, "glyph_recog_text": "You", "glyph_recog_ld": 1.0}, {"polygon": [[293, 360], [292, 359], [292, 387], [472, 389], [462, 354]], "text": "coaches", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Loaches", "recog_valid": false, "glyph_recog_text": "coaches", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027718.jpg", "caption": "a cat laying on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289866.jpg", "caption": "a man and woman sitting on a couch in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158794.jpg", "caption": "a group of people sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420964.jpg", "caption": "a red truck with an elephant on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000420976.jpg", "caption": "a ferry is traveling down the water near a tall building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552052.jpg", "caption": "a woman holding a tennis racket on a tennis court", "annotations": [{"polygon": [[303, 126], [303, 126], [371, 129], [371, 161], [300, 156]], "text": "60%", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "60%", "recog_valid": true, "glyph_recog_text": "60%", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552054.jpg", "caption": "a horse pulling a carriage down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289912.jpg", "caption": "a computer desk with a keyboard and mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421005.jpg", "caption": "a man on the beach holding a yellow kite", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158863.jpg", "caption": "a truck driving down the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552089.jpg", "caption": "a street sign with two street names", "annotations": [{"polygon": [[18, 12], [137, 40], [138, 71], [14, 44], [13, 15]], "text": "SKIDMORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "SKIDMORE", "recog_valid": true, "glyph_recog_text": "SKIDMORE", "glyph_recog_ld": 1.0}, {"polygon": [[154, 41], [194, 53], [197, 69], [192, 81], [157, 74], [150, 63], [150, 49]], "text": "OLD", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "OLD", "recog_valid": true, "glyph_recog_text": "OLD", "glyph_recog_ld": 1.0}, {"polygon": [[201, 54], [269, 70], [267, 98], [206, 85]], "text": "TOWN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "TOWN", "recog_valid": true, "glyph_recog_text": "TOWN", "glyph_recog_ld": 1.0}, {"polygon": [[232, 24], [268, 33], [271, 52], [264, 62], [231, 53]], "text": "200", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "200", "recog_valid": true, "glyph_recog_text": "200", "glyph_recog_ld": 1.0}, {"polygon": [[276, 72], [368, 92], [368, 121], [277, 101]], "text": "HISTORIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HISTORIC", "recog_valid": true, "glyph_recog_text": "HISTORIC", "glyph_recog_ld": 1.0}, {"polygon": [[374, 91], [374, 125], [453, 141], [455, 114]], "text": "DISTRICT", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "DISTRICT", "recog_valid": true, "glyph_recog_text": "DISTRICT", "glyph_recog_ld": 1.0}, {"polygon": [[403, 183], [399, 253], [449, 251], [453, 190]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "5", "recog_valid": false, "glyph_recog_text": "0H", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[406, 281], [392, 384], [452, 374], [504, 359], [504, 338], [495, 259]], "text": "AVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AVE", "recog_valid": true, "glyph_recog_text": "AVE", "glyph_recog_ld": 1.0}, {"polygon": [[183, 336], [191, 428], [338, 396], [353, 372], [355, 329], [341, 300], [320, 304]], "text": "THIRD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THIRD", "recog_valid": true, "glyph_recog_text": "THIRD", "glyph_recog_ld": 1.0}, {"polygon": [[53, 365], [49, 455], [53, 459], [119, 448], [129, 349]], "text": "SW", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "言", "recog_valid": false, "glyph_recog_text": "0三", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[191, 140], [197, 236], [314, 256], [307, 161]], "text": "PINE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PINE", "recog_valid": true, "glyph_recog_text": "PINE", "glyph_recog_ld": 1.0}, {"polygon": [[17, 106], [15, 209], [100, 224], [109, 123]], "text": "SW", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SW", "recog_valid": true, "glyph_recog_text": "s", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289962.jpg", "caption": "a young boy walking under a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000289968.jpg", "caption": "a black and white photo of a stop sign", "annotations": [{"polygon": [[161, 147], [151, 265], [313, 346], [323, 211]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027826.jpg", "caption": "a woman with a backpack and luggage at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552114.jpg", "caption": "a colorful container of food", "annotations": [{"polygon": [[242, 269], [232, 278], [264, 311], [274, 301]], "text": "BLACK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BLACK", "recog_valid": true, "glyph_recog_text": "BLACK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000158915.jpg", "caption": "a man riding a bike down a city street at dusk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027844.jpg", "caption": "a bus stop sign is surrounded by ivy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552142.jpg", "caption": "a cat laying on top of a refrigerator", "annotations": [{"polygon": [[249, 300], [250, 357], [408, 351], [408, 293]], "text": "GATO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GATO", "recog_valid": true, "glyph_recog_text": "GATO", "glyph_recog_ld": 1.0}, {"polygon": [[106, 394], [106, 394], [128, 414], [95, 456], [74, 438]], "text": "E", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "E", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027873.jpg", "caption": "a narrow street with a clock tower in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552170.jpg", "caption": "a motorcycle parked on a dirt road with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421105.jpg", "caption": "a red and white airplane with a propeller on it", "annotations": [{"polygon": [[183, 282], [178, 301], [257, 318], [264, 301]], "text": "SPARKY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPARKY", "recog_valid": true, "glyph_recog_text": "SPARKY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027890.jpg", "caption": "a large truck driving down a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421106.jpg", "caption": "a steam train traveling down the tracks with smoke coming out of the engine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421108.jpg", "caption": "a woman sitting on the floor with her children", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421134.jpg", "caption": "a busy street with people walking and driving down it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290072.jpg", "caption": "two women laying on the beach with surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552245.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552254.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[120, 128], [120, 128], [155, 135], [155, 182], [124, 179]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "的", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421193.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000027977.jpg", "caption": "a garage door with a stop sign on it", "annotations": [{"polygon": [[43, 92], [38, 127], [81, 140], [86, 105]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421202.jpg", "caption": "a train is parked at a train station", "annotations": [{"polygon": [[197, 479], [214, 488], [244, 461], [230, 454]], "text": "10-1", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "10-1", "recog_valid": true, "glyph_recog_text": "10-1", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159069.jpg", "caption": "a woman walking a dog and a red scooter", "annotations": [{"polygon": [[471, 179], [472, 207], [508, 215], [513, 189], [512, 177]], "text": "KAM", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "IKAV", "recog_valid": false, "glyph_recog_text": "KAM", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[469, 141], [470, 178], [510, 178], [511, 148], [494, 135]], "text": "OAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MR", "recog_valid": false, "glyph_recog_text": ":", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159070.jpg", "caption": "a horse is riding in the field has texts", "annotations": [{"polygon": [[127, 472], [124, 495], [389, 488], [388, 465]], "text": "www.Sportex.net", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "www.sporlcXnet", "recog_valid": false, "glyph_recog_text": "www.Sportex.net", "glyph_recog_ld": 0.6666668888887407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028012.jpg", "caption": "a woman sitting in a chair looking at her cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421228.jpg", "caption": "a young girl riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552305.jpg", "caption": "the cobblestone pub in dublin, ireland", "annotations": [{"polygon": [[393, 196], [395, 226], [484, 216], [480, 189]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290168.jpg", "caption": "a green bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290174.jpg", "caption": "a man in a reflective vest standing in the middle of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028044.jpg", "caption": "a large airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159127.jpg", "caption": "a black cat laying on a desk next to a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290201.jpg", "caption": "a woman sitting at a table with a hot dog and chips", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552346.jpg", "caption": "a group of people on snowboards in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552348.jpg", "caption": "a train crossing over a river in front of a tall building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421274.jpg", "caption": "a train on the tracks with a blue and white train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028072.jpg", "caption": "a boy playing tennis on a court with balls", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421315.jpg", "caption": "a man and woman cutting a cake at a party", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028103.jpg", "caption": "a man playing tennis on a court", "annotations": [{"polygon": [[53, 36], [52, 94], [-2, 93], [-2, 37]], "text": "U", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "U", "recog_valid": true, "glyph_recog_text": "U", "glyph_recog_ld": 1.0}, {"polygon": [[103, 36], [103, 98], [297, 98], [295, 39]], "text": "FedEx", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "FedEx", "recog_valid": true, "glyph_recog_text": "FedEx", "glyph_recog_ld": 1.0}, {"polygon": [[403, 36], [403, 98], [513, 97], [514, 34]], "text": "Fed", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Fed", "recog_valid": true, "glyph_recog_text": "Fed", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421325.jpg", "caption": "a sign on a building has texts", "annotations": [{"polygon": [[189, 95], [187, 120], [193, 121], [207, 111], [238, 96], [241, 88], [240, 80], [234, 73], [224, 71], [195, 88]], "text": "freddie", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Seatde", "recog_valid": false, "glyph_recog_text": "freddie", "glyph_recog_ld": 0.28571530612099116}, {"polygon": [[239, 102], [241, 118], [254, 112], [272, 109], [297, 100], [311, 85], [317, 77], [313, 54], [290, 66], [265, 84], [249, 88], [245, 93]], "text": "Dorothys", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Dotoiys", "recog_valid": false, "glyph_recog_text": "Dorothys", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[189, 125], [187, 157], [313, 144], [316, 111]], "text": "RESTAURANT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RESTAURANT", "recog_valid": true, "glyph_recog_text": "RESTAURANT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421341.jpg", "caption": "a black and white photo of a bench on a sidewalk", "annotations": [{"polygon": [[441, 381], [485, 339], [494, 347], [451, 391]], "text": "SEBASTIAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SERASTIAN", "recog_valid": false, "glyph_recog_text": "SEBASTIAN", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552420.jpg", "caption": "a boy jumping up to catch a frisbee on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421367.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[93, 236], [92, 277], [205, 278], [207, 231]], "text": "Gali", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Gali", "recog_valid": true, "glyph_recog_text": "Gali", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552440.jpg", "caption": "a man holding a cell phone with a text message on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552453.jpg", "caption": "a man playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290320.jpg", "caption": "a man in a red hat riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552472.jpg", "caption": "a woman standing at a table with a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159280.jpg", "caption": "a dog sitting on a motorcycle in front of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159285.jpg", "caption": "a person holding a hot dog with mustard on it", "annotations": [{"polygon": [[464, 198], [468, 228], [437, 247], [357, 240], [352, 202], [437, 211]], "text": "Tresh", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "(seij", "recog_valid": false, "glyph_recog_text": "Tresh", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[464, 258], [457, 304], [430, 317], [378, 312], [347, 288], [373, 270], [435, 275]], "text": "N0T", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LON", "recog_valid": false, "glyph_recog_text": "NOT", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552518.jpg", "caption": "people walking down the street in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552520.jpg", "caption": "a refrigerator with food and drinks inside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290379.jpg", "caption": "a man walks past the bitter end restaurant on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028251.jpg", "caption": "a large elephant statue in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159331.jpg", "caption": "a man is cutting a cake in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421488.jpg", "caption": "three men in suits and ties cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421491.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028276.jpg", "caption": "a dog wearing a red sweater standing next to a door", "annotations": [{"polygon": [[9, 121], [17, 139], [54, 115], [51, 102]], "text": "Cyc", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Cycrolu", "recog_valid": false, "glyph_recog_text": "Cyc", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028288.jpg", "caption": "a hot dog and a drink on a table", "annotations": [{"polygon": [[144, 231], [167, 262], [155, 266], [128, 238]], "text": "SCHEDULE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3N3KX", "recog_valid": false, "glyph_recog_text": "SEOPRCCAR", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028304.jpg", "caption": "a person holding a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028317.jpg", "caption": "a car parked in the snow next to a stop sign", "annotations": [{"polygon": [[24, 162], [112, 163], [110, 147], [126, 146], [126, 122], [24, 118]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421533.jpg", "caption": "a clock, a clock face, and a clock face", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421539.jpg", "caption": "a street sign with a sticker on it", "annotations": [{"polygon": [[69, 78], [72, 102], [164, 95], [164, 72]], "text": "MONDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "MONDAY", "recog_valid": true, "glyph_recog_text": "MONDAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159398.jpg", "caption": "a group of people playing frisbee in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028329.jpg", "caption": "a small vase with a single flower sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552630.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421562.jpg", "caption": "a bed with a blanket and a window", "annotations": [{"polygon": [[220, 283], [223, 305], [270, 293], [268, 284], [260, 273]], "text": "ERIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LERIA", "recog_valid": false, "glyph_recog_text": "ERIA", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290498.jpg", "caption": "a woman riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421588.jpg", "caption": "a man sitting at a desk with his arms crossed", "annotations": [{"polygon": [[288, 102], [289, 136], [487, 137], [484, 103]], "text": "QUARTUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "QUARTUS", "recog_valid": true, "glyph_recog_text": "QUARTUS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290524.jpg", "caption": "a computer monitor on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159453.jpg", "caption": "a cat laying on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159456.jpg", "caption": "a clock with roman numerals on it", "annotations": [{"polygon": [[232, 201], [239, 216], [245, 214], [254, 213], [262, 212], [271, 215], [279, 219], [284, 223], [290, 233], [302, 225], [298, 220], [291, 210], [286, 205], [279, 201], [272, 198], [262, 196], [251, 196], [237, 199]], "text": "CACHEUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CACHEUP", "recog_valid": false, "glyph_recog_text": "CACHEUR", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028385.jpg", "caption": "a hospital room with a clock and medical equipment", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421602.jpg", "caption": "a yellow bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159472.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421618.jpg", "caption": "a woman sitting on a bench with a bunch of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159482.jpg", "caption": "a group of children on skis in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159495.jpg", "caption": "a man standing under an umbrella with boxes of bananas", "annotations": [{"polygon": [[149, 81], [236, 137], [238, 159], [232, 157], [213, 147], [197, 137], [173, 117], [149, 111]], "text": "LARA BAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LANLBLE", "recog_valid": false, "glyph_recog_text": "LARA BAR", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552717.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028451.jpg", "caption": "a banana and two lemons on a scale", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290600.jpg", "caption": "a white van parked next to a bike rack", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028480.jpg", "caption": "a group of people standing around a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290628.jpg", "caption": "a yellow bus is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159567.jpg", "caption": "a purple double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552791.jpg", "caption": "a white house with a clock on the front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159582.jpg", "caption": "a blue double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290656.jpg", "caption": "a group of people standing in a line on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421735.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028523.jpg", "caption": "a parking garage with many motorcycles and people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290668.jpg", "caption": "a computer monitor sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421744.jpg", "caption": "a large jet airplane taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159626.jpg", "caption": "a shelf with a clock and a vase on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421773.jpg", "caption": "a man sitting on a bench with a bicycle", "annotations": [{"polygon": [[11, 265], [35, 321], [27, 324], [3, 269]], "text": "FUJI", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "195-", "recog_valid": false, "glyph_recog_text": "25:", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421777.jpg", "caption": "a view of a busy street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159634.jpg", "caption": "a truck with a kite flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290724.jpg", "caption": "a group of people standing on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552870.jpg", "caption": "an old black and white photo of a street sign", "annotations": [{"polygon": [[62, 447], [69, 479], [158, 479], [134, 450]], "text": "311", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "31", "recog_valid": false, "glyph_recog_text": "311", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552895.jpg", "caption": "a woman with a suitcase and a box", "annotations": [{"polygon": [[288, 442], [293, 448], [321, 422], [316, 415]], "text": "DEM", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DEAI", "recog_valid": false, "glyph_recog_text": "红去坠", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159683.jpg", "caption": "a train at a train station with people walking by", "annotations": [{"polygon": [[271, 210], [271, 209], [312, 219], [307, 265], [268, 249]], "text": "82303", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "鑫", "recog_valid": false, "glyph_recog_text": "a N m", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290761.jpg", "caption": "a street with a street light and a building", "annotations": [{"polygon": [[119, 341], [113, 357], [122, 375], [175, 373], [190, 363], [175, 339]], "text": "Ne", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "4 NO", "recog_valid": false, "glyph_recog_text": "N e", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[123, 262], [103, 292], [117, 302], [123, 314], [166, 328], [203, 324], [204, 298], [157, 277]], "text": "RICH HAWT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2一", "recog_valid": false, "glyph_recog_text": "RICH HAWT", "glyph_recog_ld": 0.1111120987643347}, {"polygon": [[231, 232], [237, 262], [248, 257], [262, 256], [280, 262], [285, 265], [288, 227], [271, 223], [238, 229]], "text": "OLKA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OUNE", "recog_valid": false, "glyph_recog_text": "OLKA", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159714.jpg", "caption": "sheep and sheepdog show at the fair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028642.jpg", "caption": "navy sailors use their cell phones during a ceremony", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421873.jpg", "caption": "a man skateboarding on a sidewalk outside of a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028658.jpg", "caption": "a person riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000552961.jpg", "caption": "an old motorcycle is on display in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028674.jpg", "caption": "a man sitting at a table with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159777.jpg", "caption": "a clock on a building in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290856.jpg", "caption": "a street sign on the side of a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421944.jpg", "caption": "a toothbrush and toothpaste in a glass", "annotations": [{"polygon": [[281, 294], [291, 288], [339, 366], [325, 371]], "text": "Colgate", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "F4e61o5", "recog_valid": false, "glyph_recog_text": "Colgwte", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028742.jpg", "caption": "a fire hydrant and a brick wall", "annotations": [{"polygon": [[117, 129], [226, 123], [325, 130], [383, 121], [375, 54], [315, 60], [291, 55], [278, 59], [223, 48], [220, 61], [167, 65], [150, 55], [142, 65], [112, 76]], "text": "HYDRANT", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HYDBANT", "recog_valid": false, "glyph_recog_text": "HYDRANT", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553043.jpg", "caption": "a double decker bus parked in a parking lot", "annotations": [{"polygon": [[253, 242], [250, 263], [282, 255], [284, 232]], "text": "BUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BUS", "recog_valid": true, "glyph_recog_text": "BUS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553046.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553056.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421995.jpg", "caption": "a delta airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000421996.jpg", "caption": "an old fashioned stove with a pot on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290935.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422008.jpg", "caption": "an antique apples stand with baskets of apples", "annotations": [{"polygon": [[212, 64], [207, 81], [204, 92], [220, 103], [230, 93], [270, 96], [283, 92], [284, 69], [238, 66]], "text": "ANTIQUE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ANIIQUE", "recog_valid": false, "glyph_recog_text": "ANTIQUE", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028806.jpg", "caption": "a sign for garage on the side of a building", "annotations": [{"polygon": [[224, 188], [225, 225], [226, 228], [390, 221], [388, 183], [224, 187]], "text": "GARAGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GOIRAIGE", "recog_valid": false, "glyph_recog_text": "GARAGE", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[86, 356], [86, 392], [97, 387], [114, 384], [135, 381], [153, 382], [153, 358], [86, 356]], "text": "PRATTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "PRATTS", "recog_valid": true, "glyph_recog_text": "PRATTS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290952.jpg", "caption": "a train traveling down the tracks with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159887.jpg", "caption": "a clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553116.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000290974.jpg", "caption": "inca cola and bread", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422061.jpg", "caption": "two people on horses in front of a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422067.jpg", "caption": "a black and white photo of a man and woman at a food stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159926.jpg", "caption": "a bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028855.jpg", "caption": "two children and a woman on a scooter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291004.jpg", "caption": "a blue comforter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553149.jpg", "caption": "a vase with flowers in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553150.jpg", "caption": "a large airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291016.jpg", "caption": "a large number of umbrellas hanging from the ceiling", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159948.jpg", "caption": "a man standing next to a sign that says no zone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028889.jpg", "caption": "two men standing next to a parking meter", "annotations": [{"polygon": [[416, 219], [416, 219], [452, 221], [451, 259], [412, 257]], "text": "65%", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "65%", "recog_valid": true, "glyph_recog_text": ":", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[457, 220], [494, 220], [491, 259], [455, 257]], "text": "OFF", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "OFF", "recog_valid": true, "glyph_recog_text": ":", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[237, 216], [237, 216], [274, 216], [273, 256], [239, 253]], "text": "14K", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "14K", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[277, 218], [412, 220], [407, 259], [275, 254]], "text": "KRY 6", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RY", "recog_valid": false, "glyph_recog_text": "KRY6", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159962.jpg", "caption": "a red truck is parked on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553184.jpg", "caption": "a stop sign on a sidewalk next to a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159971.jpg", "caption": "a person sitting on a floor with a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291056.jpg", "caption": "a street sign for retail clearance outlet on a building", "annotations": [{"polygon": [[195, 135], [201, 144], [210, 138], [220, 132], [230, 130], [243, 131], [257, 139], [266, 149], [273, 139], [264, 131], [250, 122], [234, 119], [220, 121], [203, 129]], "text": "LOUMARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "60M4", "recog_valid": false, "glyph_recog_text": "LOUMARK", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[225, 325], [223, 384], [255, 422], [273, 418], [270, 368]], "text": "Clothing", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Clothing", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[193, 195], [184, 205], [195, 215], [211, 224], [232, 225], [246, 224], [247, 219], [242, 211], [226, 214], [213, 213]], "text": "CLOTHING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CLOTHBO", "recog_valid": false, "glyph_recog_text": "CLOTHING", "glyph_recog_ld": 0.6250004687494141}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000028917.jpg", "caption": "a stop sign is reflected in a window", "annotations": [{"polygon": [[343, 179], [343, 213], [418, 216], [416, 182]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "90T2", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422133.jpg", "caption": "a group of horses standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000159993.jpg", "caption": "a man is swinging a bat on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160009.jpg", "caption": "a group of people walking down the street", "annotations": [{"polygon": [[137, 118], [139, 172], [167, 169], [164, 113]], "text": "TJ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "香飘中", "recog_valid": false, "glyph_recog_text": "F-", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160028.jpg", "caption": "a model train is driving through a tunnel", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160034.jpg", "caption": "a sign has texts on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422198.jpg", "caption": "a tv screen on a plane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422206.jpg", "caption": "a teddy bear with pom poms in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291141.jpg", "caption": "a man holding a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291149.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553297.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029018.jpg", "caption": "a toy train is on the tracks with a small elephant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160102.jpg", "caption": "two women playing wii", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160103.jpg", "caption": "a large brick building with a clock on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553336.jpg", "caption": "a group of people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160137.jpg", "caption": "a street sign with the words pennsylvania avenue", "annotations": [{"polygon": [[32, 270], [32, 270], [70, 267], [75, 236], [32, 237]], "text": "NW", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NW", "recog_valid": true, "glyph_recog_text": "NW", "glyph_recog_ld": 1.0}, {"polygon": [[149, 272], [149, 272], [422, 262], [412, 211], [151, 223]], "text": "PENNSYLVANIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PENNSYLVANIA", "recog_valid": true, "glyph_recog_text": "PENNSYLVANIA", "glyph_recog_ld": 1.0}, {"polygon": [[435, 251], [435, 251], [489, 248], [487, 220], [442, 220]], "text": "AVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AVE", "recog_valid": true, "glyph_recog_text": "AVE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422283.jpg", "caption": "a group of women playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553373.jpg", "caption": "a woman is standing next to a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160159.jpg", "caption": "a person is doing a trick on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029096.jpg", "caption": "a white suv with a surfboard on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160171.jpg", "caption": "the royal horse guards at the royal horseguards parade", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029104.jpg", "caption": "a man in a chef hat is preparing food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160181.jpg", "caption": "a young boy eating a donut in a restaurant", "annotations": [{"polygon": [[254, 367], [275, 372], [289, 374], [304, 376], [320, 375], [329, 375], [340, 401], [329, 401], [320, 402], [304, 402], [288, 401], [272, 399], [258, 397], [246, 394], [234, 391]], "text": "WARS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WAPS", "recog_valid": false, "glyph_recog_text": "WARS", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553402.jpg", "caption": "a young boy holding a donut with icing", "annotations": [{"polygon": [[306, 248], [350, 219], [382, 244], [332, 280]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2127", "recog_valid": false, "glyph_recog_text": "THE", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029116.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553407.jpg", "caption": "a couple of buses are parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422336.jpg", "caption": "a man and woman sitting at a table in a library", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422343.jpg", "caption": "a group of remote controls are shown in different colors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291285.jpg", "caption": "a pizza with ham, tomatoes and arugula", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160214.jpg", "caption": "a kitchen with a large counter and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029153.jpg", "caption": "a group of people on top of a truck", "annotations": [{"polygon": [[121, 278], [124, 324], [161, 310], [157, 272], [131, 287], [130, 273]], "text": "K LINE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "号", "recog_valid": false, "glyph_recog_text": "¥-", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029155.jpg", "caption": "a red and white sign", "annotations": [{"polygon": [[199, 244], [201, 275], [283, 272], [280, 242]], "text": "LANE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LANE", "recog_valid": true, "glyph_recog_text": "LANE", "glyph_recog_ld": 1.0}, {"polygon": [[124, 293], [125, 322], [246, 320], [245, 288]], "text": "CLOSED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CLOSED", "recog_valid": true, "glyph_recog_text": "CLOSED", "glyph_recog_ld": 1.0}, {"polygon": [[132, 338], [134, 370], [237, 365], [235, 334]], "text": "AHEAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AHEAD", "recog_valid": true, "glyph_recog_text": "AHEAD", "glyph_recog_ld": 1.0}, {"polygon": [[149, 166], [104, 216], [114, 233], [152, 185]], "text": "PHAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PHAT", "recog_valid": true, "glyph_recog_text": "PHAT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160229.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553446.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160238.jpg", "caption": "the old west saloon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422389.jpg", "caption": "a pizza on a wooden table", "annotations": [{"polygon": [[493, 208], [480, 251], [513, 259], [512, 215], [512, 215]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "中", "recog_valid": false, "glyph_recog_text": "节", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291319.jpg", "caption": "a baseball player standing on a base", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291320.jpg", "caption": "a white and red bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422393.jpg", "caption": "a street sign and traffic lights on a street", "annotations": [{"polygon": [[173, 237], [172, 262], [307, 270], [306, 241], [172, 236]], "text": "Burwell", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Burwell", "recog_valid": true, "glyph_recog_text": "Burwell", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160255.jpg", "caption": "a man sitting on the floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160273.jpg", "caption": "two men in graduation gowns and caps are looking at their phones", "annotations": [{"polygon": [[0, 333], [76, 334], [74, 364], [0, 364]], "text": "T H E ", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "THL", "recog_valid": false, "glyph_recog_text": "THE", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[320, 336], [427, 338], [427, 366], [320, 364]], "text": "MILY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AILY", "recog_valid": false, "glyph_recog_text": "MILY", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[453, 337], [512, 338], [513, 367], [453, 366]], "text": "H O ", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "HO", "recog_valid": false, "glyph_recog_text": "HO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029214.jpg", "caption": "a yellow and black train with a crane on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553509.jpg", "caption": "a clock with a palm tree in front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160294.jpg", "caption": "a woman wearing red and white skiing gear", "annotations": [{"polygon": [[221, 298], [241, 290], [257, 318], [235, 326]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "12", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[334, 492], [335, 501], [378, 473], [377, 464]], "text": "FISCHER", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SISEHER", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160298.jpg", "caption": "a small electronic device sitting on top of a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160302.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553531.jpg", "caption": "a green bus parked in a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553534.jpg", "caption": "a stop sign and a traffic cone on a street", "annotations": [{"polygon": [[285, 80], [272, 132], [289, 139], [388, 149], [411, 121], [414, 101], [407, 95]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422469.jpg", "caption": "a parking meter on the side of the road", "annotations": [{"polygon": [[220, 395], [167, 403], [198, 485], [267, 470], [225, 394]], "text": "CLEAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "CLEAR", "recog_valid": true, "glyph_recog_text": "CLEAR", "glyph_recog_ld": 1.0}, {"polygon": [[199, 352], [149, 359], [165, 398], [219, 390]], "text": "KEEP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "卓", "recog_valid": false, "glyph_recog_text": "KEEP", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029254.jpg", "caption": "a yellow and blue bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160325.jpg", "caption": "a horse pulling a carriage in front of a castle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160331.jpg", "caption": "a stop sign on a street corner with a house in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422479.jpg", "caption": "a stop sign on a street in a city", "annotations": [{"polygon": [[39, 200], [238, 211], [235, 287], [35, 284]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553554.jpg", "caption": "a cutting board with oranges and a juicer on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291417.jpg", "caption": "a man using a laptop computer at a booth", "annotations": [{"polygon": [[61, 97], [62, 144], [197, 152], [196, 108]], "text": "production", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "production", "recog_valid": true, "glyph_recog_text": "productior", "glyph_recog_ld": 0.9000000999999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553563.jpg", "caption": "a baseball player swinging a bat", "annotations": [{"polygon": [[250, 224], [245, 236], [247, 240], [251, 233], [257, 230], [282, 222], [283, 217], [279, 215], [279, 211], [272, 213], [270, 212], [267, 210], [263, 215]], "text": "31", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "上", "recog_valid": false, "glyph_recog_text": "3 1", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029287.jpg", "caption": "a green sign with a stop sign next to a sign that says south 5", "annotations": [{"polygon": [[222, 181], [220, 239], [254, 235], [249, 178]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LO", "recog_valid": false, "glyph_recog_text": "LO", "glyph_recog_ld": 1.0}, {"polygon": [[305, 139], [305, 172], [428, 153], [429, 122], [307, 138]], "text": "SOUTH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SOUTH", "recog_valid": true, "glyph_recog_text": "SOUTH", "glyph_recog_ld": 1.0}, {"polygon": [[237, 292], [235, 330], [385, 314], [381, 284], [306, 282], [237, 290]], "text": "SALEM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Salem", "recog_valid": false, "glyph_recog_text": "SALEM", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[202, 356], [202, 389], [293, 380], [294, 347], [205, 356]], "text": "NEXT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NEXT", "recog_valid": true, "glyph_recog_text": "NEXT", "glyph_recog_ld": 1.0}, {"polygon": [[327, 345], [327, 378], [422, 368], [418, 335], [329, 346]], "text": "LEFT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LEFT", "recog_valid": true, "glyph_recog_text": "LEFT", "glyph_recog_ld": 1.0}, {"polygon": [[63, 370], [60, 404], [68, 403], [114, 398], [116, 364]], "text": "CLINTONS", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "CENTON", "recog_valid": false, "glyph_recog_text": "CLINTONS", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[74, 326], [65, 345], [65, 357], [96, 363], [98, 355], [98, 330]], "text": "68", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "68", "recog_valid": true, "glyph_recog_text": "68", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291438.jpg", "caption": "a laptop computer sitting on a desk", "annotations": [{"polygon": [[100, 379], [105, 385], [165, 356], [159, 352], [100, 379]], "text": "Gehirncoach", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Gebisnsonch", "recog_valid": false, "glyph_recog_text": "Eet", "glyph_recog_ld": 0.0909099173546205}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160380.jpg", "caption": "a man sleeping on a bench outside a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029328.jpg", "caption": "a woman holding a can of soda and a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291484.jpg", "caption": "a restaurant with a chalkboard menu and a chalkboard menu", "annotations": [{"polygon": [[359, 235], [362, 252], [418, 236], [414, 218], [373, 226]], "text": "ROBEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "H ben", "recog_valid": false, "glyph_recog_text": "ROBEN", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[387, 308], [393, 323], [423, 311], [418, 292]], "text": "Tabouli 8", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Tabali", "recog_valid": false, "glyph_recog_text": "ToOyi万", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160423.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291503.jpg", "caption": "a stop sign and a couple of people walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553659.jpg", "caption": "a cement mixer truck parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160452.jpg", "caption": "a train with graffiti on it is on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291527.jpg", "caption": "a man skiing down a slope", "annotations": [{"polygon": [[23, 275], [130, 243], [138, 272], [29, 302]], "text": "DKB.de", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "DKB.de", "recog_valid": true, "glyph_recog_text": "DKB.de", "glyph_recog_ld": 1.0}, {"polygon": [[246, 220], [334, 201], [343, 223], [248, 243]], "text": "XDrive", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "xDrive", "recog_valid": false, "glyph_recog_text": "XDrive", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[398, 188], [403, 212], [496, 195], [491, 173]], "text": "xDrive", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "xDrive", "recog_valid": true, "glyph_recog_text": "xOrive", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553671.jpg", "caption": "a table with a bunch of fruits and vegetables", "annotations": [{"polygon": [[311, 445], [313, 470], [413, 463], [408, 437]], "text": "Organic", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Organie", "recog_valid": false, "glyph_recog_text": "Organic", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160459.jpg", "caption": "a woman holding a coffee cup and a doughnut", "annotations": [{"polygon": [[63, 66], [64, 87], [93, 96], [94, 85], [88, 82], [86, 73], [84, 73]], "text": "Market", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Marks", "recog_valid": false, "glyph_recog_text": "ftarke t", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422608.jpg", "caption": "a horse eating hay in a fenced in area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553700.jpg", "caption": "a group of people on skis in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160490.jpg", "caption": "a motorcycle parked on a dirt road near the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029426.jpg", "caption": "a stop sign, a stop sign, a stop sign, a stop sign, a stop sign, a stop sign, a stop sign, a stop sign,", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291574.jpg", "caption": "a table with a cup of coffee and a stuffed animal", "annotations": [{"polygon": [[319, 252], [318, 312], [353, 319], [359, 261]], "text": "B", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "M", "recog_valid": false, "glyph_recog_text": "cm", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553735.jpg", "caption": "a blue and yellow double decker bus on a city street", "annotations": [{"polygon": [[7, 182], [7, 204], [111, 214], [111, 199]], "text": "MPHORN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CTAMPHORN", "recog_valid": false, "glyph_recog_text": "MPHORN", "glyph_recog_ld": 0.6666670370366254}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029456.jpg", "caption": "a man standing in front of a bus with people walking behind him", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029457.jpg", "caption": "i'm telling you, i'm not telling you", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553753.jpg", "caption": "a train is parked at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160549.jpg", "caption": "a small plane parked on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553777.jpg", "caption": "three men posing for a photo on a ski slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422705.jpg", "caption": "a bunch of bananas in a basket", "annotations": [{"polygon": [[182, 301], [339, 308], [345, 313], [343, 331], [339, 335], [181, 328]], "text": "BANANAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BANANAS", "recog_valid": true, "glyph_recog_text": "BANANAS", "glyph_recog_ld": 1.0}, {"polygon": [[230, 333], [332, 338], [346, 356], [346, 379], [327, 396], [269, 395], [224, 392], [205, 374], [209, 362], [215, 340]], "text": "89", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "89c", "recog_valid": false, "glyph_recog_text": "8 9", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[429, 73], [493, 53], [494, 68], [431, 87]], "text": "TIPPIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "TIPPIA", "recog_valid": false, "glyph_recog_text": "TIPPIN", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029508.jpg", "caption": "a plate of food on a counter next to a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029509.jpg", "caption": "a dog is playing with a frisbee in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553800.jpg", "caption": "a woman standing on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291658.jpg", "caption": "a parking meter on the side of a street", "annotations": [{"polygon": [[385, 73], [388, 110], [448, 65], [446, 18], [439, 20], [430, 39]], "text": "band", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "7", "recog_valid": false, "glyph_recog_text": "band", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029525.jpg", "caption": "an old black and white photo of a construction site", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160597.jpg", "caption": "a police officer on a motorcycle in a busy city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029538.jpg", "caption": "a black and white photo of a man standing next to a bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160610.jpg", "caption": "a blue bus driving down a street with a clock tower in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160614.jpg", "caption": "a man laying in bed reading a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553851.jpg", "caption": "a stop sign on a street corner in front of a brick building", "annotations": [{"polygon": [[66, 152], [127, 167], [122, 191], [59, 177]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029564.jpg", "caption": "a crocheted hat with a red and blue top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422804.jpg", "caption": "a clock with a stuffed animal on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422816.jpg", "caption": "two teddy bears in wedding attire sitting on top of tea cups", "annotations": [{"polygon": [[110, 306], [124, 384], [257, 357], [243, 286]], "text": "Scott", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Scott", "recog_valid": true, "glyph_recog_text": "Scott", "glyph_recog_ld": 1.0}, {"polygon": [[344, 300], [343, 351], [412, 360], [440, 339], [456, 309], [453, 274]], "text": "Angela", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Angel", "recog_valid": false, "glyph_recog_text": "Angela", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029602.jpg", "caption": "a man and his dog in a canoe on a lake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029607.jpg", "caption": "a woman cutting a cake with a child and another person", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553896.jpg", "caption": "two people on surfboards riding waves in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422828.jpg", "caption": "a hot dog with cheese and a soda", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160691.jpg", "caption": "a person walking on the snow covered road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000553908.jpg", "caption": "an old black and white photo of a man in a suit and hat standing next to an airplane", "annotations": [{"polygon": [[69, 306], [202, 334], [204, 391], [60, 366]], "text": "POR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "POR", "recog_valid": true, "glyph_recog_text": "POR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160701.jpg", "caption": "a stop sign and a street sign on a pole", "annotations": [{"polygon": [[272, 231], [310, 212], [310, 195], [272, 214]], "text": "070A", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "02704", "recog_valid": false, "glyph_recog_text": "070A", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[57, 333], [145, 329], [160, 283], [60, 289]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[289, 312], [289, 312], [468, 261], [467, 210], [292, 273]], "text": "PATTISON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PATTISON", "recog_valid": true, "glyph_recog_text": "PATTISON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291773.jpg", "caption": "a stop sign and street signs on a pole", "annotations": [{"polygon": [[229, 322], [225, 366], [228, 376], [235, 375], [241, 359], [245, 365], [251, 363], [282, 342], [294, 294], [292, 277], [290, 276]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[219, 142], [218, 159], [218, 160], [279, 182], [277, 164]], "text": "PARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PARK", "recog_valid": true, "glyph_recog_text": "PARK", "glyph_recog_ld": 1.0}, {"polygon": [[289, 166], [283, 183], [323, 198], [324, 180]], "text": "AVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NE", "recog_valid": false, "glyph_recog_text": "AVE", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[199, 124], [199, 131], [261, 155], [262, 148]], "text": "PARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HISTORIC", "recog_valid": false, "glyph_recog_text": ":.h、", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[278, 154], [277, 162], [339, 184], [340, 179]], "text": "DOWNTOWN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UOWNTOWN", "recog_valid": false, "glyph_recog_text": "R车", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[249, 131], [249, 133], [264, 135], [271, 132], [310, 97], [310, 78]], "text": "ST S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "WSTS", "recog_valid": false, "glyph_recog_text": "STS", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[238, 135], [241, 137], [272, 111], [270, 107]], "text": "TORIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "vroRie", "recog_valid": false, "glyph_recog_text": ".11", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[273, 103], [273, 108], [322, 67], [321, 61]], "text": "DOWNTOWN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "TOWNTOEN", "recog_valid": false, "glyph_recog_text": "LtV4l652", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291780.jpg", "caption": "a large blue and white airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029643.jpg", "caption": "a cat is sleeping on a desk", "annotations": [{"polygon": [[0, 420], [11, 427], [90, 373], [77, 366]], "text": "Touche", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Tonshe", "recog_valid": false, "glyph_recog_text": "Tauche", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[189, 417], [196, 420], [235, 382], [228, 378]], "text": "DEPRESSIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "GEPRESSVE", "recog_valid": false, "glyph_recog_text": "RHOOW", "glyph_recog_ld": 0.1111120987643347}, {"polygon": [[204, 425], [234, 392], [241, 395], [210, 427]], "text": "ARTISTIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "LOTASTLE", "recog_valid": false, "glyph_recog_text": "和", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[367, 411], [345, 448], [326, 447], [351, 406]], "text": "field", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ifield", "recog_valid": false, "glyph_recog_text": "field", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422876.jpg", "caption": "a woman sitting at a table with a pizza", "annotations": [{"polygon": [[176, 282], [207, 282], [213, 320], [194, 316], [181, 309], [177, 298]], "text": "SAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "N", "recog_valid": false, "glyph_recog_text": ":", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[212, 283], [212, 283], [326, 273], [331, 286], [324, 308], [291, 318], [219, 320]], "text": "FRANCLSCO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FRANCISCO", "recog_valid": false, "glyph_recog_text": "FRANCLSCO", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[245, 348], [255, 341], [274, 342], [306, 332], [312, 342], [251, 362]], "text": "GODONS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ho3", "recog_valid": false, "glyph_recog_text": "GODONS", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422878.jpg", "caption": "a man is cutting the skin off of a dead horse", "annotations": [{"polygon": [[159, 126], [155, 145], [167, 157], [188, 168], [202, 171], [217, 172], [233, 171], [233, 167], [208, 166], [200, 163], [191, 159], [183, 155], [176, 151], [168, 142], [158, 126]], "text": "France", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Frang", "recog_valid": false, "glyph_recog_text": "France", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422899.jpg", "caption": "a large airplane parked on a wet runway", "annotations": [{"polygon": [[63, 198], [69, 187], [95, 242], [87, 255]], "text": "excel", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ece", "recog_valid": false, "glyph_recog_text": "青关它告!", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291827.jpg", "caption": "two people on surfboards in the ocean at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029687.jpg", "caption": "a man in a black shirt and shorts is holding a remote control", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160773.jpg", "caption": "a baseball player swinging his bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291855.jpg", "caption": "a bike is on the back of a bus", "annotations": [{"polygon": [[231, 75], [235, 100], [362, 98], [367, 69]], "text": "TRANSIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "TRANSIT", "recog_valid": true, "glyph_recog_text": "TRANSIT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029712.jpg", "caption": "a herd of sheep", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029715.jpg", "caption": "a man and woman standing outside a building with shopping bags", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554010.jpg", "caption": "a blue and white bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291868.jpg", "caption": "a car is stopped at a red light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422953.jpg", "caption": "a large airplane on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160815.jpg", "caption": "a group of people walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160818.jpg", "caption": "a group of motorcyclists riding down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000422967.jpg", "caption": "a person holding a wii remote", "annotations": [{"polygon": [[69, 356], [69, 356], [89, 388], [106, 375], [83, 341]], "text": "Wii", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Wii", "recog_valid": true, "glyph_recog_text": "Wii", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160824.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160844.jpg", "caption": "a woman holding skis on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160848.jpg", "caption": "a bicycle parked next to a boat in a harbor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554070.jpg", "caption": "a man on a skateboard doing a trick in a skate park", "annotations": [{"polygon": [[277, 209], [270, 227], [276, 245], [283, 245], [300, 237], [303, 229], [301, 217]], "text": "B", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "B", "recog_valid": true, "glyph_recog_text": "m", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554085.jpg", "caption": "a birthday cake with candles on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000291947.jpg", "caption": "president obama speaks at the white house press briefing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554104.jpg", "caption": "a man is playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160893.jpg", "caption": "a man is cutting a piece of food with a knife", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423037.jpg", "caption": "a train traveling down a canal in a rural area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160899.jpg", "caption": "a group of women playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423052.jpg", "caption": "a street sign that says free cone on it", "annotations": [{"polygon": [[99, 94], [100, 142], [133, 142], [134, 93]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "a", "recog_valid": false, "glyph_recog_text": "a", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554138.jpg", "caption": "a man riding a horse in a rodeo arena", "annotations": [{"polygon": [[49, 300], [49, 269], [128, 269], [128, 300]], "text": "ARMY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ARMY", "recog_valid": true, "glyph_recog_text": "ARMY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000160932.jpg", "caption": "a group of men posing for a picture with a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423083.jpg", "caption": "two computer monitors on a desk with a keyboard and mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292051.jpg", "caption": "an apple tv is shown on a television screen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029909.jpg", "caption": "a person is holding a toothbrush and toothpaste", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292058.jpg", "caption": "a group of people standing around a stop sign", "annotations": [{"polygon": [[408, 154], [406, 185], [446, 187], [447, 157]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[440, 127], [439, 152], [468, 158], [471, 132]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[346, 143], [346, 168], [406, 163], [407, 138]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOR", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161006.jpg", "caption": "a man on a horse and a dog in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000029936.jpg", "caption": "a woman playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292081.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[92, 112], [81, 153], [265, 240], [265, 198]], "text": "AVENUE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AMERICAS", "recog_valid": false, "glyph_recog_text": "AVENUE", "glyph_recog_ld": 0.2500009374988281}, {"polygon": [[110, 94], [106, 115], [178, 150], [180, 127]], "text": "AVENUE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "AVENUE", "recog_valid": true, "glyph_recog_text": "AVENUE", "glyph_recog_ld": 1.0}, {"polygon": [[213, 145], [218, 171], [245, 183], [245, 160]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THE", "recog_valid": true, "glyph_recog_text": "THE", "glyph_recog_ld": 1.0}, {"polygon": [[313, 53], [312, 80], [316, 97], [358, 125], [354, 84]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[378, 104], [383, 148], [414, 169], [417, 132]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[235, 399], [233, 467], [275, 460], [283, 392]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "云", "recog_valid": false, "glyph_recog_text": "0H", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[155, 418], [155, 476], [209, 470], [206, 404]], "text": "23", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "N", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[104, 427], [109, 480], [134, 482], [143, 419]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[254, 267], [242, 323], [287, 345], [300, 286]], "text": "AV", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Z", "recog_valid": false, "glyph_recog_text": "AV", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[200, 240], [187, 300], [217, 310], [223, 292], [217, 243]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "O", "recog_valid": false, "glyph_recog_text": "9", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[230, 234], [227, 248], [258, 264], [261, 254]], "text": "Instoric", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0570905", "recog_valid": false, "glyph_recog_text": "fnstotk", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554232.jpg", "caption": "a man and woman standing next to an old train", "annotations": [{"polygon": [[176, 214], [175, 244], [245, 235], [245, 204]], "text": "2472", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2472", "recog_valid": true, "glyph_recog_text": "2472", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423162.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161019.jpg", "caption": "a yellow and blue double decker bus parked on grass", "annotations": [{"polygon": [[71, 245], [72, 218], [143, 174], [144, 213]], "text": "EASTBOURNE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "YBORNE", "recog_valid": false, "glyph_recog_text": "EASTBOURNE", "glyph_recog_ld": 0.5000004999995}, {"polygon": [[153, 180], [155, 199], [185, 174], [182, 159]], "text": "FOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "peiargf", "recog_valid": false, "glyph_recog_text": "FOR", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161027.jpg", "caption": "a bike lane with a white line painted on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161028.jpg", "caption": "a man in red shirt playing ping pong with another man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292116.jpg", "caption": "a woman is playing tennis with two other people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423202.jpg", "caption": "a man holding a hot dog with pickles on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161062.jpg", "caption": "a large airplane sitting on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292136.jpg", "caption": "a bus and a bike ride down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292146.jpg", "caption": "a group of people are riding bicycles on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423247.jpg", "caption": "a woman is holding a horse while it is being pulled by a cart", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554336.jpg", "caption": "a person jumping in the air on skis", "annotations": [{"polygon": [[430, 363], [430, 396], [265, 395], [264, 363]], "text": "snowbird", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "snowbird", "recog_valid": true, "glyph_recog_text": "snowbird", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423290.jpg", "caption": "a soccer player kicking the ball on the field", "annotations": [{"polygon": [[172, 181], [170, 209], [202, 211], [204, 183]], "text": "29", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "29", "recog_valid": true, "glyph_recog_text": "29", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292226.jpg", "caption": "a group of cyclists racing down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161157.jpg", "caption": "a woman holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292243.jpg", "caption": "a double decker bus on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030100.jpg", "caption": "a woman walking a bear on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292257.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423330.jpg", "caption": "a messy living room with a chair, table, and a window", "annotations": [{"polygon": [[99, 351], [133, 346], [137, 379], [102, 380]], "text": "GAP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "GAP", "recog_valid": true, "glyph_recog_text": "GAP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161190.jpg", "caption": "a man sitting at a desk with his arms crossed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423341.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423343.jpg", "caption": "a laptop computer with a cell phone on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423349.jpg", "caption": "a clock on the side of a building in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554434.jpg", "caption": "a horse pulling a carriage down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161229.jpg", "caption": "a sign that says heavy pedestrian traffic", "annotations": [{"polygon": [[130, 164], [132, 136], [331, 98], [331, 132], [132, 164]], "text": "CAUTION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CAUTION", "recog_valid": true, "glyph_recog_text": "CAUTION", "glyph_recog_ld": 1.0}, {"polygon": [[198, 209], [202, 182], [310, 166], [302, 196], [198, 208]], "text": "HEAVY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HEAVY", "recog_valid": true, "glyph_recog_text": "HEAVY", "glyph_recog_ld": 1.0}, {"polygon": [[157, 252], [157, 226], [358, 199], [359, 229]], "text": "PEDESTRIAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PEDESTRIAN", "recog_valid": true, "glyph_recog_text": "PEDESTRIAN", "glyph_recog_ld": 1.0}, {"polygon": [[181, 262], [317, 248], [318, 277], [185, 292], [181, 264]], "text": "TRAFFIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TRAFFIC", "recog_valid": true, "glyph_recog_text": "TRAFFIC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554452.jpg", "caption": "a boy walking down a street with a kite", "annotations": [{"polygon": [[247, 174], [264, 168], [275, 165], [289, 165], [305, 173], [311, 179], [305, 196], [295, 188], [287, 187], [279, 187], [271, 187], [264, 191], [254, 193]], "text": "LONDON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1ON.DOy", "recog_valid": false, "glyph_recog_text": "LONDON", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554453.jpg", "caption": "a street with many motorcycles parked on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423383.jpg", "caption": "a man on a skateboard doing a trick", "annotations": [{"polygon": [[235, 464], [235, 495], [369, 496], [364, 466]], "text": "Harada", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "haeada", "recog_valid": false, "glyph_recog_text": "Harada", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554464.jpg", "caption": "a boy is swinging a baseball bat at a tee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554488.jpg", "caption": "a man standing next to a motorcycle on a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292346.jpg", "caption": "a sign on a brick wall", "annotations": [{"polygon": [[242, 314], [243, 356], [289, 357], [291, 314]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "10", "recog_valid": true, "glyph_recog_text": "10", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292351.jpg", "caption": "a man standing next to a motorcycle and camera", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030231.jpg", "caption": "a man holding a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423448.jpg", "caption": "a refrigerator and a washing machine in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030235.jpg", "caption": "a large passenger jet sitting on the tarmac", "annotations": [{"polygon": [[442, 184], [464, 215], [504, 212], [503, 181]], "text": "VIF", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "V", "recog_valid": false, "glyph_recog_text": "VIF", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423454.jpg", "caption": "a railroad crossing with traffic lights and a truck", "annotations": [{"polygon": [[394, 212], [425, 176], [430, 183], [401, 218], [396, 216]], "text": "RAIL ROAD CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CROSSING", "recog_valid": false, "glyph_recog_text": "ifneiuartp", "glyph_recog_ld": 9.99998999939855e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030239.jpg", "caption": "a baseball player in blue and white uniform pitching a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292386.jpg", "caption": "a cutting board with a knife, a bowl of soup, and a measuring cup", "annotations": [{"polygon": [[357, 197], [356, 223], [397, 228], [400, 211]], "text": "Hot", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ho", "recog_valid": false, "glyph_recog_text": "Hot", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554541.jpg", "caption": "a man in white playing tennis on a blue court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292432.jpg", "caption": "two trains are parked at a station with people walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161367.jpg", "caption": "a large group of people sitting at tables in a large room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554587.jpg", "caption": "a group of people in pink shirts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292444.jpg", "caption": "two police officers on horses in the middle of a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030307.jpg", "caption": "a stop sign and two street signs on a pole", "annotations": [{"polygon": [[88, 146], [88, 180], [209, 175], [209, 138]], "text": "BETHEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BETHEL", "recog_valid": true, "glyph_recog_text": "BETHEL", "glyph_recog_ld": 1.0}, {"polygon": [[177, 219], [179, 255], [212, 257], [212, 225]], "text": "OLD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OLD", "recog_valid": true, "glyph_recog_text": "°", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161384.jpg", "caption": "a bike parked outside of a restaurant", "annotations": [{"polygon": [[344, 193], [346, 193], [470, 190], [477, 156], [347, 157], [347, 157]], "text": "SNOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SNOW", "recog_valid": true, "glyph_recog_text": "SNOW", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030314.jpg", "caption": "three sheep standing in a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423540.jpg", "caption": "a baseball player throwing a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030334.jpg", "caption": "a young boy is standing on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030345.jpg", "caption": "a large airplane parked at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554636.jpg", "caption": "a double decker bus is parked on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030349.jpg", "caption": "a black and white photo of surfers riding on surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554642.jpg", "caption": "a blue bench sitting in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292498.jpg", "caption": "a bus is parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554658.jpg", "caption": "a kitchen with a refrigerator and a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292522.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554689.jpg", "caption": "a train is pulling into a station with people on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030408.jpg", "caption": "a street sign in the city of vienna", "annotations": [{"polygon": [[240, 159], [240, 172], [282, 188], [297, 190], [298, 180]], "text": "Ladefatigkeit", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ladetatilkein", "recog_valid": false, "glyph_recog_text": "Ladefarligkeif", "glyph_recog_ld": 0.6428573979590014}, {"polygon": [[215, 206], [210, 230], [267, 230], [271, 236], [279, 235], [279, 231], [278, 211], [247, 207]], "text": "Anfang", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Anfang", "recog_valid": true, "glyph_recog_text": "Anfang", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292558.jpg", "caption": "two computer monitors sitting on a desk with a mouse and keyboard", "annotations": [{"polygon": [[223, 206], [150, 216], [153, 241], [226, 231]], "text": "emade", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "made", "recog_valid": false, "glyph_recog_text": "emade", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[354, 176], [350, 201], [455, 211], [462, 186]], "text": "feelone ", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "feelone", "recog_valid": false, "glyph_recog_text": "feelone", "glyph_recog_ld": 1.0}, {"polygon": [[137, 178], [140, 214], [265, 202], [262, 174]], "text": "feelone", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "feelone", "recog_valid": true, "glyph_recog_text": "feelone", "glyph_recog_ld": 1.0}, {"polygon": [[102, 212], [105, 240], [153, 233], [148, 208]], "text": "like ", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "like", "recog_valid": false, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030418.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554706.jpg", "caption": "a tray with fries, a hot dog and a soda", "annotations": [{"polygon": [[133, 158], [133, 158], [123, 185], [188, 192], [191, 162]], "text": "Me", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Mel", "recog_valid": false, "glyph_recog_text": "Me", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[205, 223], [195, 244], [202, 251], [229, 243], [256, 240], [266, 247], [298, 268], [295, 251], [259, 226], [233, 221]], "text": "CH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "CH", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292579.jpg", "caption": "a table with a donut, coffee and money", "annotations": [{"polygon": [[191, 366], [137, 387], [144, 402], [200, 384]], "text": "TOU", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TOU", "recog_valid": true, "glyph_recog_text": "TOU", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554729.jpg", "caption": "a conveyor belt with luggage on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554742.jpg", "caption": "a young boy holding a baseball bat on a baseball field", "annotations": [{"polygon": [[274, 240], [279, 245], [301, 238], [303, 230], [303, 220], [297, 215], [293, 215], [290, 218], [281, 218], [264, 220]], "text": "OR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ORL", "recog_valid": false, "glyph_recog_text": "OR", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554743.jpg", "caption": "a narrow street with people walking down it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554756.jpg", "caption": "a truck with a large pig on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554759.jpg", "caption": "a sandwich and a bowl of salad are on a tray", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423693.jpg", "caption": "a small train with a red and blue train on tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030484.jpg", "caption": "a bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161578.jpg", "caption": "a traffic light on a city street with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161599.jpg", "caption": "a cat is sleeping on a chair in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161610.jpg", "caption": "a woman posing under a street sign", "annotations": [{"polygon": [[153, 147], [155, 181], [335, 173], [333, 144]], "text": "Jonathan", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Jonathan", "recog_valid": true, "glyph_recog_text": "Jonathan", "glyph_recog_ld": 1.0}, {"polygon": [[359, 138], [355, 172], [403, 172], [404, 146]], "text": "AV", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AV", "recog_valid": true, "glyph_recog_text": "AV", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423761.jpg", "caption": "a table with a bunch of vegetables and fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554841.jpg", "caption": "a green pole with signs on it", "annotations": [{"polygon": [[277, 270], [290, 266], [294, 272], [350, 253], [351, 246], [358, 241], [359, 249], [436, 220], [426, 275], [418, 275], [420, 262], [277, 307]], "text": "Broadway", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Broadway", "recog_valid": true, "glyph_recog_text": "Broadway", "glyph_recog_ld": 1.0}, {"polygon": [[444, 249], [482, 238], [486, 245], [486, 258], [447, 270]], "text": "1100", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "1100", "recog_valid": true, "glyph_recog_text": "1100", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161648.jpg", "caption": "a vintage toy stove and oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030585.jpg", "caption": "a large orange truck parked next to a white truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554891.jpg", "caption": "a man sitting at a table outside a cafe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030608.jpg", "caption": "a man and a dog playing frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423832.jpg", "caption": "a woman eating a banana while wearing a shirt that says the long island life", "annotations": [{"polygon": [[196, 344], [222, 347], [256, 358], [252, 381], [230, 379], [218, 376], [203, 376], [193, 349]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THE", "recog_valid": true, "glyph_recog_text": "THE", "glyph_recog_ld": 1.0}, {"polygon": [[170, 374], [203, 377], [209, 384], [210, 414], [206, 414], [197, 400], [188, 401], [166, 397], [162, 392], [166, 380]], "text": "ONG", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ONG", "recog_valid": true, "glyph_recog_text": "ONG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554943.jpg", "caption": "a baseball game on tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423872.jpg", "caption": "a group of people working in a kitchen", "annotations": [{"polygon": [[381, 263], [381, 263], [372, 281], [405, 298], [416, 278], [416, 278]], "text": "BLEE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SLEE", "recog_valid": false, "glyph_recog_text": "BLEE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161751.jpg", "caption": "a steam train traveling down the tracks near a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423906.jpg", "caption": "two boats are sitting on the shore", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000554982.jpg", "caption": "a living room with a piano, a couch, and a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030699.jpg", "caption": "a young boy is playing with an umbrella in front of a fire truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161782.jpg", "caption": "a parking meter with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030713.jpg", "caption": "a large blue and white airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161796.jpg", "caption": "a teddy bear is standing in front of the canada building", "annotations": [{"polygon": [[174, 180], [170, 219], [514, 211], [512, 164]], "text": "CANADA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CANADA", "recog_valid": true, "glyph_recog_text": "CANADA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555015.jpg", "caption": "a bathroom with a shower, toilet and sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292887.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000423966.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030775.jpg", "caption": "a blue and white bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030779.jpg", "caption": "a man on a skateboard doing a trick on a set of stairs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292923.jpg", "caption": "a group of people sitting at a table with their hands in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030787.jpg", "caption": "a jeep driving down a road with people standing on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555118.jpg", "caption": "a group of people walking down the street near a motorcycle shop", "annotations": [{"polygon": [[133, 237], [133, 258], [165, 267], [166, 254], [158, 246]], "text": "Paris", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Paris", "recog_valid": true, "glyph_recog_text": "Paris", "glyph_recog_ld": 1.0}, {"polygon": [[15, 131], [69, 157], [66, 172], [10, 148]], "text": "SON", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SON", "recog_valid": true, "glyph_recog_text": "SON", "glyph_recog_ld": 1.0}, {"polygon": [[255, 250], [204, 230], [206, 218], [256, 241]], "text": "HARLEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HARLEY", "recog_valid": true, "glyph_recog_text": "HARLEY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161917.jpg", "caption": "a man in a suit and tie standing in front of a book shelf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161924.jpg", "caption": "a motorcycle is laying in the dirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000292998.jpg", "caption": "a cow in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161929.jpg", "caption": "a person playing a video game on a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424068.jpg", "caption": "a living room with a kitchen and a ceiling fan", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161935.jpg", "caption": "a basket of rambutan fruit is on display at a market", "annotations": [{"polygon": [[78, 167], [95, 156], [102, 150], [125, 138], [133, 129], [120, 120], [103, 129], [91, 140], [78, 150]], "text": "Forpical", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "p9pador", "recog_valid": false, "glyph_recog_text": "Forpical", "glyph_recog_ld": 0.12500109374863277}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424091.jpg", "caption": "a city street with cars driving on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555170.jpg", "caption": "a man holding a surfboard in front of a black background", "annotations": [{"polygon": [[97, 352], [132, 320], [152, 343], [116, 376]], "text": "X", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "X", "recog_valid": true, "glyph_recog_text": "X", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030888.jpg", "caption": "a skateboarder is doing a trick in the air", "annotations": [{"polygon": [[23, 271], [33, 266], [55, 320], [48, 326]], "text": "SKULL", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SKULL", "recog_valid": true, "glyph_recog_text": "shuLe", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161973.jpg", "caption": "four red and white airplanes flying in formation", "annotations": [{"polygon": [[161, 295], [168, 286], [172, 279], [178, 272], [184, 264], [194, 272], [185, 280], [181, 287], [176, 294], [172, 298], [170, 302]], "text": "AEROSHELL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IEOSHELL", "recog_valid": false, "glyph_recog_text": "电703", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161974.jpg", "caption": "a flatbed truck with a flatbed trailer attached to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424124.jpg", "caption": "a baseball player is swinging a bat at a ball", "annotations": [{"polygon": [[49, 170], [45, 201], [63, 201], [72, 206], [74, 201], [118, 200], [110, 169]], "text": "AQUAFINA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "AUUAJINA", "recog_valid": false, "glyph_recog_text": "AQUAFINA", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[152, 169], [147, 200], [165, 200], [174, 208], [174, 201], [220, 201], [216, 170]], "text": "AQUAFINA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AUUAHINA", "recog_valid": false, "glyph_recog_text": "AQUAFINA", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[376, 170], [369, 204], [390, 203], [398, 209], [407, 203], [448, 202], [443, 171]], "text": "AQUAFINA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AUUANIN", "recog_valid": false, "glyph_recog_text": "AQUAFINA", "glyph_recog_ld": 0.6250004687494141}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030910.jpg", "caption": "a red train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030911.jpg", "caption": "a wall with a bunch of clocks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424132.jpg", "caption": "a group of women on rollerblades", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424137.jpg", "caption": "a man and woman eating hot dogs and french fries", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000161996.jpg", "caption": "a man throwing a frisbee in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293070.jpg", "caption": "a man and a little girl eating cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293074.jpg", "caption": "a green bus driving down a street", "annotations": [{"polygon": [[379, 284], [378, 313], [410, 309], [411, 289], [404, 279]], "text": "Tindo", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Tindo", "recog_valid": true, "glyph_recog_text": "Tindo", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555227.jpg", "caption": "an old black and white photo of a man in a suit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000030944.jpg", "caption": "mouse pad with flower design", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162020.jpg", "caption": "a street sign with a building in the background", "annotations": [{"polygon": [[169, 292], [257, 312], [257, 329], [212, 326], [169, 312], [165, 302]], "text": "Campbell", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Campbell", "recog_valid": true, "glyph_recog_text": "Campbelf", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555237.jpg", "caption": "man standing next to a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162046.jpg", "caption": "a man standing next to a truck with a microwave and other appliances", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424193.jpg", "caption": "two horses are standing next to a trailer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293142.jpg", "caption": "a young boy holding a doughnut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424218.jpg", "caption": "a photo of a fruit stand with apples and oranges", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555308.jpg", "caption": "a large airplane parked on a tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424254.jpg", "caption": "two men in cowboy hats riding horses in a parade", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424263.jpg", "caption": "a bird perched on a branch in front of mountains", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555339.jpg", "caption": "a cat sits on a table in front of bookshelves", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162163.jpg", "caption": "a man is holding a hot dog on a grill", "annotations": [{"polygon": [[113, 248], [230, 278], [238, 246], [139, 203]], "text": "Marllio", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "nanLun", "recog_valid": false, "glyph_recog_text": "Marllio", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[130, 355], [130, 355], [193, 360], [197, 276], [149, 258], [118, 350]], "text": "14", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "t l", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162177.jpg", "caption": "a baseball game with two players on the field", "annotations": [{"polygon": [[172, 178], [202, 169], [207, 201], [178, 205]], "text": "21", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424342.jpg", "caption": "a television set in a living room with a fireplace", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293271.jpg", "caption": "a stop sign on a street corner with a child standing next to it", "annotations": [{"polygon": [[232, 162], [310, 151], [312, 184], [235, 194]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293274.jpg", "caption": "a double decker bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424348.jpg", "caption": "a close up of a propeller on an airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555427.jpg", "caption": "a cat is laying on a bed with a bottle of soda", "annotations": [{"polygon": [[61, 318], [79, 374], [112, 374], [95, 309]], "text": "Snapp", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Snagy", "recog_valid": false, "glyph_recog_text": "Snapp", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[95, 337], [109, 375], [130, 373], [119, 335]], "text": "diet", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "dier", "recog_valid": false, "glyph_recog_text": "diet", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162223.jpg", "caption": "a red and white airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162250.jpg", "caption": "a group of horses standing in a field at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555473.jpg", "caption": "a group of people on paddle boards in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031187.jpg", "caption": "a man cutting a birthday cake with a group of children", "annotations": [{"polygon": [[248, 393], [281, 369], [296, 382], [264, 404]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "howy", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[258, 353], [221, 379], [230, 386], [274, 362]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Birmday", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424407.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424411.jpg", "caption": "a colorful wooden bench next to a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293340.jpg", "caption": "a toaster oven with a pan inside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293341.jpg", "caption": "a refrigerator with a lot of magnets and pictures on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424429.jpg", "caption": "a double decker bus is driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555506.jpg", "caption": "a bus with a colorful design driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293362.jpg", "caption": "a croissant sandwich on a white plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293385.jpg", "caption": "a sandwich and a cup of coffee on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162318.jpg", "caption": "a toy train is on a bridge over a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424481.jpg", "caption": "a small red and white airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162346.jpg", "caption": "a man is putting something in a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031288.jpg", "caption": "a small fluffy dog sitting in the back seat of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555577.jpg", "caption": "a woman sitting on a bed with a cat on her lap", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293453.jpg", "caption": "a car with a lot of luggage in the back", "annotations": [{"polygon": [[394, 166], [401, 171], [395, 180], [387, 205], [377, 239], [369, 237], [380, 196], [386, 181], [390, 172]], "text": "birdrensplace.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "uaptaoopdsuauapo", "recog_valid": false, "glyph_recog_text": "bndnenepkacecom", "glyph_recog_ld": 0.12500054687465822}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162386.jpg", "caption": "a little girl sitting at a table with a plate of food", "annotations": [{"polygon": [[188, 372], [192, 365], [197, 350], [201, 329], [214, 348], [227, 346], [241, 344], [246, 347], [297, 340], [298, 331], [308, 331], [304, 369], [264, 377], [262, 387], [255, 393], [243, 394], [238, 387], [239, 370]], "text": "Angel", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Arat", "recog_valid": false, "glyph_recog_text": "Angel", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031335.jpg", "caption": "four young men sitting on the ground with skateboards", "annotations": [{"polygon": [[304, 355], [377, 313], [512, 345], [513, 415]], "text": "BUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SS", "recog_valid": false, "glyph_recog_text": "BUS", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162427.jpg", "caption": "a cat's paw on a laptop keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031367.jpg", "caption": "a giraffe in a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031382.jpg", "caption": "grilled corn on the cob", "annotations": [{"polygon": [[11, 361], [10, 401], [112, 401], [117, 416], [122, 416], [118, 355]], "text": "cookiin", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Cooky", "recog_valid": false, "glyph_recog_text": "cookiin", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162462.jpg", "caption": "two horses standing in a field of tall grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031391.jpg", "caption": "two buses are parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031393.jpg", "caption": "a man standing next to a large sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162488.jpg", "caption": "a kitchen with a microwave and a basket on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293576.jpg", "caption": "two men standing next to a pink elephant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555738.jpg", "caption": "a flock of sheep walking in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293605.jpg", "caption": "a woman is walking down the street with a bicycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031462.jpg", "caption": "a man sitting on a bench in front of a poster", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031497.jpg", "caption": "a green and white airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293647.jpg", "caption": "a police officer on a motorcycle", "annotations": [{"polygon": [[138, 214], [129, 241], [187, 249], [191, 220]], "text": "POLICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "POLICE", "recog_valid": true, "glyph_recog_text": "POLICE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424722.jpg", "caption": "a police boat with people on it in the water", "annotations": [{"polygon": [[87, 288], [87, 288], [115, 295], [150, 302], [207, 310], [211, 295], [190, 289], [105, 274], [98, 274], [89, 281]], "text": "Artemis", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Artemts", "recog_valid": false, "glyph_recog_text": "Artemis", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555805.jpg", "caption": "a clock hanging from the ceiling in a dark room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555808.jpg", "caption": "a small plane is landing on a beach near a hotel", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162596.jpg", "caption": "a parking meter on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424771.jpg", "caption": "a surfer riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293704.jpg", "caption": "a large clock with the london west hollywood sign on it", "annotations": [{"polygon": [[128, 339], [236, 344], [393, 342], [404, 392], [114, 390]], "text": "LONDON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LONOIN", "recog_valid": false, "glyph_recog_text": "LONDON", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031562.jpg", "caption": "two black wii remotes are held in a hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293713.jpg", "caption": "a black and white photo of a skateboarder in front of a graffiti wall", "annotations": [{"polygon": [[98, 161], [96, 199], [237, 216], [235, 176]], "text": "ARROLL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ARROLL", "recog_valid": true, "glyph_recog_text": "ARROLL", "glyph_recog_ld": 1.0}, {"polygon": [[95, 210], [94, 272], [241, 280], [241, 223]], "text": "KAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KA", "recog_valid": false, "glyph_recog_text": "KAT", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[95, 280], [92, 367], [249, 367], [245, 287]], "text": "PARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PARK", "recog_valid": true, "glyph_recog_text": "PARK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424797.jpg", "caption": "a row of motorcycles parked in a row", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162654.jpg", "caption": "a man sitting on a skateboard", "annotations": [{"polygon": [[141, 89], [156, 108], [173, 88], [158, 72]], "text": "112", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "卡", "recog_valid": false, "glyph_recog_text": "112", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031604.jpg", "caption": "a street with a sign that says one way", "annotations": [{"polygon": [[147, 403], [116, 447], [410, 448], [362, 399]], "text": "ONLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "/TNO", "recog_valid": false, "glyph_recog_text": "ONLY", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162700.jpg", "caption": "a woman sitting on top of a suitcase with a polka dot skirt", "annotations": [{"polygon": [[352, 474], [384, 456], [396, 475], [417, 461], [424, 475], [411, 487], [421, 505], [404, 504], [397, 500], [388, 504], [369, 513]], "text": "WNP", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "W", "recog_valid": false, "glyph_recog_text": "WNP", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162712.jpg", "caption": "a picture of a police officer on a motorcycle", "annotations": [{"polygon": [[141, 383], [141, 404], [324, 413], [325, 384]], "text": "Polishelikopter", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Polishelikopter", "recog_valid": true, "glyph_recog_text": "Polishelikopter", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031641.jpg", "caption": "a young boy holding a tennis ball and a racquet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162717.jpg", "caption": "a building with many umbrellas and signs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031666.jpg", "caption": "a black steam engine train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031673.jpg", "caption": "a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555982.jpg", "caption": "a bench sitting in a park near a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424915.jpg", "caption": "a bicycle with a basket on it", "annotations": [{"polygon": [[399, 162], [462, 101], [461, 113], [401, 171]], "text": "STEVENS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "1717", "recog_valid": false, "glyph_recog_text": "ztaytue", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000555998.jpg", "caption": "a cell phone, a wallet, a watch, a necklace, and a cord", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556003.jpg", "caption": "a woman standing next to a monument in front of a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293862.jpg", "caption": "a fedex airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162792.jpg", "caption": "two pictures of a train and a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293880.jpg", "caption": "a fighter jet sitting on top of a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162813.jpg", "caption": "a refrigerator with drinks and food in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031752.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[362, 109], [427, 103], [423, 130], [358, 133]], "text": "Nikon", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Viion", "recog_valid": false, "glyph_recog_text": "Nikon", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424980.jpg", "caption": "a sign that says no parking on fire lane", "annotations": [{"polygon": [[223, 134], [223, 134], [223, 189], [239, 188], [240, 165], [260, 152], [270, 137], [272, 124], [268, 117], [262, 119], [249, 126], [242, 131]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "a", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[245, 219], [245, 219], [247, 237], [302, 207], [300, 188]], "text": "LANE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LANE", "recog_valid": true, "glyph_recog_text": "LANE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000424989.jpg", "caption": "a baseball player sliding into a base", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293931.jpg", "caption": "a traffic light hanging from a power line", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293940.jpg", "caption": "a person riding a motorcycle on a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556089.jpg", "caption": "a group of construction workers working on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031813.jpg", "caption": "a man riding a bike with a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031822.jpg", "caption": "a young boy in a baseball uniform", "annotations": [{"polygon": [[173, 279], [205, 271], [211, 321], [176, 323]], "text": "H", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "P", "recog_valid": false, "glyph_recog_text": "工", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293966.jpg", "caption": "a red truck parked on a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293979.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000293980.jpg", "caption": "a fire pit and a bench in front of a mountain range", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425078.jpg", "caption": "a yellow school bus driving down a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031867.jpg", "caption": "a woman holding two skis in front of her", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556162.jpg", "caption": "two boys sitting on a bed with stuffed animals", "annotations": [{"polygon": [[317, 282], [320, 275], [327, 269], [336, 266], [348, 263], [362, 259], [374, 259], [379, 260], [377, 272], [367, 272], [353, 272], [338, 276], [329, 281], [323, 289]], "text": "ATHLETIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KTHL FTT", "recog_valid": false, "glyph_recog_text": "ATHLETHC", "glyph_recog_ld": 0.37500078124902336}, {"polygon": [[234, 376], [232, 421], [282, 422], [284, 376]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "20", "recog_valid": true, "glyph_recog_text": "20", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294029.jpg", "caption": "a man is washing a truck in a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294034.jpg", "caption": "a group of people paddling in a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031904.jpg", "caption": "a woman and a little girl standing next to each other", "annotations": [{"polygon": [[114, 324], [114, 348], [175, 334], [170, 307]], "text": "wild", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wild", "recog_valid": false, "glyph_recog_text": "wild", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[179, 302], [182, 332], [218, 319], [216, 297]], "text": "Cats", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cu", "recog_valid": false, "glyph_recog_text": "Cats", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031912.jpg", "caption": "a toilet and a trash can next to each other", "annotations": [{"polygon": [[185, 165], [234, 179], [227, 201], [179, 184]], "text": "IGILI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IGIL", "recog_valid": false, "glyph_recog_text": "IGILI", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031915.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425131.jpg", "caption": "a red motorcycle parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425137.jpg", "caption": "a plate of tacos on a counter with a pan of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000162996.jpg", "caption": "a silver bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425141.jpg", "caption": "a little girl is opening a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294074.jpg", "caption": "a blue and white bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031938.jpg", "caption": "a cat sitting on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425158.jpg", "caption": "a street light is on", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294102.jpg", "caption": "a plate with donuts and berries on it", "annotations": [{"polygon": [[340, 200], [341, 162], [429, 165], [432, 197]], "text": "Neil", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Nail", "recog_valid": false, "glyph_recog_text": "Neil", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163045.jpg", "caption": "two dogs laying on a red couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031979.jpg", "caption": "a bunch of carrots on a table", "annotations": [{"polygon": [[136, 490], [138, 467], [197, 486], [194, 514]], "text": "DUTCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Duken", "recog_valid": false, "glyph_recog_text": "DUTCH", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163055.jpg", "caption": "a street sign with two street names on it", "annotations": [{"polygon": [[107, 207], [107, 226], [200, 209], [200, 188]], "text": "FRANKLIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FRANKLIN", "recog_valid": true, "glyph_recog_text": "FRANKLIN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000031994.jpg", "caption": "a piece of cake sitting on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163089.jpg", "caption": "a man and a child sleeping in a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163091.jpg", "caption": "a museum with several planes on display", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294199.jpg", "caption": "a horse and rider jumping over an obstacle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556356.jpg", "caption": "a bathroom with a sink and a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294214.jpg", "caption": "a woman with a phone in her hand", "annotations": [{"polygon": [[380, 328], [423, 363], [407, 383], [363, 348]], "text": "htc", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "hTC", "recog_valid": false, "glyph_recog_text": "htc", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425289.jpg", "caption": "a long conveyor belt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556370.jpg", "caption": "a large airport terminal with people waiting for their luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294231.jpg", "caption": "two fishing boats on a beach with rocks and pebbles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163176.jpg", "caption": "a bunch of bananas on a shelf in a store", "annotations": [{"polygon": [[327, 41], [429, 38], [428, 89], [327, 89]], "text": "$5.98", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "5.98", "recog_valid": false, "glyph_recog_text": "$5.98", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032105.jpg", "caption": "a black suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163185.jpg", "caption": "a laptop computer sitting on a table next to a bottle of juice", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032116.jpg", "caption": "a british airways plane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556409.jpg", "caption": "a motorcycle parked on the side of a road in the woods", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032134.jpg", "caption": "a woman in pink shirt and black skirt playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425359.jpg", "caption": "a desk with a computer and a lamp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032147.jpg", "caption": "a motorcycle parked in front of a small house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163225.jpg", "caption": "two men playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163240.jpg", "caption": "a train is traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294330.jpg", "caption": "a desk with a computer, a chair, and a bookcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425415.jpg", "caption": "a man is standing on a traffic light pole", "annotations": [{"polygon": [[81, 344], [78, 426], [298, 470], [410, 469], [419, 428], [102, 346]], "text": "9867537537", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "986751 7597", "recog_valid": false, "glyph_recog_text": "9867537537", "glyph_recog_ld": 0.7272729752063862}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294360.jpg", "caption": "a snowboarder in the air", "annotations": [{"polygon": [[183, 196], [191, 207], [200, 214], [204, 217], [208, 222], [204, 227], [196, 221], [190, 216], [184, 209], [177, 200]], "text": "NOKIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NOKIA", "recog_valid": true, "glyph_recog_text": "NO414", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163292.jpg", "caption": "a woman dressed in white and holding an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425439.jpg", "caption": "a black and white photo of a speed limit sign", "annotations": [{"polygon": [[306, 236], [305, 259], [406, 266], [408, 243]], "text": "SPEED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPEED", "recog_valid": true, "glyph_recog_text": "SPEED", "glyph_recog_ld": 1.0}, {"polygon": [[306, 307], [300, 363], [402, 371], [401, 313]], "text": "35", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "35", "recog_valid": true, "glyph_recog_text": "35", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163302.jpg", "caption": "a yellow and blue train car with the word alaska on it", "annotations": [{"polygon": [[118, 193], [117, 172], [443, 99], [442, 132]], "text": "ALASKA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALAEKA", "recog_valid": false, "glyph_recog_text": "ALASKA", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556547.jpg", "caption": "a woman playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425520.jpg", "caption": "a bathroom with a monkey shower curtain and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163394.jpg", "caption": "a cat sitting in a suitcase", "annotations": [{"polygon": [[427, 164], [427, 122], [480, 140], [480, 185]], "text": "Fred", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Fredl", "recog_valid": false, "glyph_recog_text": "Fred", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[425, 167], [430, 219], [510, 234], [509, 195]], "text": "Meyer", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Myer", "recog_valid": false, "glyph_recog_text": "Meye", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[480, 266], [486, 252], [513, 269], [508, 284]], "text": "today", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "todao", "recog_valid": false, "glyph_recog_text": "today", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294466.jpg", "caption": "a family of four sitting on a bench at the boardwalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556613.jpg", "caption": "a man flying a kite in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425548.jpg", "caption": "a group of planes on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163413.jpg", "caption": "a giraffe and a pig in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294491.jpg", "caption": "a street with cars parked on both sides of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556636.jpg", "caption": "a woman sitting on a bench talking on a phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556637.jpg", "caption": "a man eating a doughnut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294498.jpg", "caption": "a girl throwing a ball to a batter", "annotations": [{"polygon": [[255, 198], [278, 186], [286, 221], [265, 230]], "text": "14", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "14", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032403.jpg", "caption": "two pairs of shoes and a hat sit on a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556695.jpg", "caption": "a man holding a sign and a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294564.jpg", "caption": "a man walking with an umbrella in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425652.jpg", "caption": "a large truck with a blue and red paint job", "annotations": [{"polygon": [[163, 124], [163, 124], [266, 105], [269, 122], [167, 140]], "text": "MALCOLM", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "MALCOLM", "recog_valid": true, "glyph_recog_text": "MALCOLM", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032442.jpg", "caption": "a woman holding a slow sign in front of a crowd", "annotations": [{"polygon": [[36, 290], [286, 306], [286, 317], [261, 370], [34, 357]], "text": "SLOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SLOW", "recog_valid": true, "glyph_recog_text": "SLOW", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294587.jpg", "caption": "a space shuttle is on display in a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163525.jpg", "caption": "a sailboat is sailing in the water near a marina", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294603.jpg", "caption": "a man on skis standing in front of a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032458.jpg", "caption": "a view of a city from a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425685.jpg", "caption": "a flock of sheep in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032471.jpg", "caption": "a group of people standing around a parking lot with motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556771.jpg", "caption": "a bus driving down a street with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163579.jpg", "caption": "a white mouse on a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556800.jpg", "caption": "a silver trailer with a cupcake on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163591.jpg", "caption": "a man standing in a store with a bunch of bags", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032522.jpg", "caption": "two people standing in the snow holding a large bag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556813.jpg", "caption": "a woman holding an umbrella in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425746.jpg", "caption": "a river with boats parked on the side of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163602.jpg", "caption": "a traffic light with a doll on top of it", "annotations": [{"polygon": [[118, 269], [132, 265], [142, 283], [156, 294], [172, 288], [175, 281], [189, 284], [176, 296], [161, 305], [137, 301], [123, 290], [119, 271]], "text": "MANCHESTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "MNCHESTER", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556824.jpg", "caption": "a surfer riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556830.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556833.jpg", "caption": "a man kiteboarding in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425773.jpg", "caption": "a truck is parked in the middle of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294716.jpg", "caption": "a woman eating a piece of cake and drinking a beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163645.jpg", "caption": "a motorcycle parked in front of a building", "annotations": [{"polygon": [[39, 52], [36, 86], [167, 90], [167, 58]], "text": "CHOICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "CHOICE", "recog_valid": true, "glyph_recog_text": "CHOICE", "glyph_recog_ld": 1.0}, {"polygon": [[266, 57], [250, 78], [245, 97], [361, 103], [372, 71]], "text": "Agency", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Aonoon", "recog_valid": false, "glyph_recog_text": "Agency", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425807.jpg", "caption": "a man is feeding a zebra in a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556886.jpg", "caption": "a man and woman standing next to a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294744.jpg", "caption": "a desk with two monitors and a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294747.jpg", "caption": "a fence with a sign", "annotations": [{"polygon": [[155, 106], [158, 152], [209, 143], [192, 100]], "text": "McDonald's", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "M", "recog_valid": false, "glyph_recog_text": "kOont1", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425822.jpg", "caption": "a man on a skateboard doing a trick", "annotations": [{"polygon": [[114, 212], [114, 247], [273, 246], [274, 215]], "text": "EMANVELE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EMANVELE", "recog_valid": true, "glyph_recog_text": "EMANVELE", "glyph_recog_ld": 1.0}, {"polygon": [[286, 214], [287, 248], [436, 246], [436, 215]], "text": "FILTO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FILTO", "recog_valid": true, "glyph_recog_text": "FILTO", "glyph_recog_ld": 1.0}, {"polygon": [[447, 212], [448, 247], [511, 248], [510, 213]], "text": "DVC", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "DVC", "recog_valid": true, "glyph_recog_text": "DVC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032607.jpg", "caption": "a man in plaid shirt riding a horse on a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294758.jpg", "caption": "a baseball game with a batter at home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294761.jpg", "caption": "a clock is sitting on a desk with a bunch of papers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163692.jpg", "caption": "a computer monitor on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294787.jpg", "caption": "three men on a podium with skis in their hands", "annotations": [{"polygon": [[118, 0], [109, 158], [135, 161], [146, 102], [138, 0]], "text": "nocssiws", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "10D22iW2", "recog_valid": false, "glyph_recog_text": "coonn.", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163718.jpg", "caption": "a cat and a dog laying on the ground near a door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425862.jpg", "caption": "a woman reading a book", "annotations": [{"polygon": [[130, 214], [130, 216], [137, 229], [214, 195], [207, 180]], "text": "DOUGLAS ADAMS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DOUGLAS", "recog_valid": false, "glyph_recog_text": "DOUOLASADAMS", "glyph_recog_ld": 0.5000004166663194}, {"polygon": [[145, 226], [207, 201], [214, 216], [154, 240]], "text": "ADAMS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ADAMS", "recog_valid": true, "glyph_recog_text": "ADAMS", "glyph_recog_ld": 1.0}, {"polygon": [[94, 232], [107, 226], [144, 292], [135, 304]], "text": "DOUGLAS ADAMS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "DOUGLAS", "recog_valid": false, "glyph_recog_text": "DOURLUAS ADAME", "glyph_recog_ld": 0.42857183673440236}, {"polygon": [[84, 248], [97, 242], [131, 298], [119, 305]], "text": "ADAMS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ADAMS", "recog_valid": true, "glyph_recog_text": "ADAMS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425864.jpg", "caption": "a red and white bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556954.jpg", "caption": "a man with glasses brushing his teeth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294813.jpg", "caption": "a motorcycle with a flower on the front wheel", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294823.jpg", "caption": "a row of parking meters with blue signs on them", "annotations": [{"polygon": [[377, 31], [378, 98], [407, 99], [411, 99], [415, 99], [414, 35]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "a", "recog_valid": false, "glyph_recog_text": "0", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[392, 252], [399, 273], [412, 290], [418, 294], [425, 284], [414, 274], [404, 249]], "text": "RETAIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "RETAIN", "recog_valid": true, "glyph_recog_text": "RETAIN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556969.jpg", "caption": "a mailbox and a sign on a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000425899.jpg", "caption": "a computer desk with a monitor, keyboard and mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294829.jpg", "caption": "1940 british royal enfield motorcycle, no number plate,", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163761.jpg", "caption": "a stop sign with a moon in the background", "annotations": [{"polygon": [[424, 213], [422, 256], [317, 268], [319, 224]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163764.jpg", "caption": "a man holding a box of donuts and a drink", "annotations": [{"polygon": [[233, 294], [272, 284], [274, 298], [236, 316]], "text": "SQUISHE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ROUIS", "recog_valid": false, "glyph_recog_text": "SQUISHE", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000556983.jpg", "caption": "a cat sitting on a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294849.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294866.jpg", "caption": "a group of people standing on a sidewalk in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032729.jpg", "caption": "a train is parked on the tracks", "annotations": [{"polygon": [[225, 238], [258, 241], [259, 272], [224, 267], [224, 238]], "text": "60089", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "60089", "recog_valid": true, "glyph_recog_text": "60089", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163803.jpg", "caption": "a teddy bear with a note on it", "annotations": [{"polygon": [[220, 302], [242, 327], [232, 335], [204, 308], [205, 304]], "text": "om", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CM", "recog_valid": false, "glyph_recog_text": "aM", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163812.jpg", "caption": "a black motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294892.jpg", "caption": "a bus driving down a street with a person on a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557045.jpg", "caption": "a baseball player on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294914.jpg", "caption": "a piece of luggage with a tag attached to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032778.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032787.jpg", "caption": "a large airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032801.jpg", "caption": "a conveyor belt with doughnuts on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032809.jpg", "caption": "a bus driving down the road with people on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000294956.jpg", "caption": "cross country skiing in norway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557102.jpg", "caption": "a man and a woman playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426049.jpg", "caption": "a row of parking meters with signs on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000163917.jpg", "caption": "a vase with pink flowers and a bottle of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032845.jpg", "caption": "a group of children standing on a field with microphones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295007.jpg", "caption": "a blue and white train at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032868.jpg", "caption": "three women sitting on a couch playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295049.jpg", "caption": "fruit flavored envelopes with fruit on them", "annotations": [{"polygon": [[227, 182], [294, 184], [356, 191], [357, 209], [315, 216], [231, 208], [223, 196]], "text": "Envelopes", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Envelopes", "recog_valid": true, "glyph_recog_text": "Envelopes", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557200.jpg", "caption": "two airplanes parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426128.jpg", "caption": "a group of people sitting on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557205.jpg", "caption": "a refrigerator with a full freezer and a full fridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164000.jpg", "caption": "a fishing boat with flags and a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295074.jpg", "caption": "a yellow school bus driving down a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164013.jpg", "caption": "a boy on a skateboard doing a trick on a ramp", "annotations": [{"polygon": [[281, 312], [414, 351], [415, 377], [276, 338]], "text": "READ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REENOE", "recog_valid": false, "glyph_recog_text": "READ", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295103.jpg", "caption": "a woman petting a dog on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557254.jpg", "caption": "louis theroux - look at heart", "annotations": [{"polygon": [[283, 89], [364, -1], [384, 14], [302, 106]], "text": "LOOK", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LOOK", "recog_valid": true, "glyph_recog_text": "LOOK", "glyph_recog_ld": 1.0}, {"polygon": [[374, 124], [457, 31], [437, 19], [355, 107]], "text": "HEAR?", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "HEAR", "recog_valid": false, "glyph_recog_text": "HEAR?", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[210, 72], [232, 46], [282, 0], [297, 13], [229, 88]], "text": "IOCC", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "10cc", "recog_valid": false, "glyph_recog_text": "10cc", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164043.jpg", "caption": "a man and a child standing in front of an elephant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557263.jpg", "caption": "a street sign with two street names", "annotations": [{"polygon": [[300, 127], [301, 153], [354, 161], [356, 137]], "text": "500", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "500", "recog_valid": true, "glyph_recog_text": "500", "glyph_recog_ld": 1.0}, {"polygon": [[227, 166], [417, 192], [418, 225], [399, 231], [209, 204]], "text": "JACKSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JACKSON", "recog_valid": true, "glyph_recog_text": "JACKSON", "glyph_recog_ld": 1.0}, {"polygon": [[307, 249], [336, 233], [338, 245], [339, 260], [310, 276]], "text": "800", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "800", "recog_valid": true, "glyph_recog_text": "800", "glyph_recog_ld": 1.0}, {"polygon": [[248, 338], [299, 307], [404, 253], [401, 297], [253, 376]], "text": "MONTGOMERY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MONTGOMERY", "recog_valid": true, "glyph_recog_text": "MONTGOMERY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032985.jpg", "caption": "a busy city street at night with many neon signs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557272.jpg", "caption": "a group of buses parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000032986.jpg", "caption": "an elephant wearing a red and yellow blanket", "annotations": [{"polygon": [[214, 187], [215, 225], [259, 225], [264, 187], [236, 184]], "text": "ISLAND", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ISLEVD", "recog_valid": false, "glyph_recog_text": "ISLAND", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[270, 189], [266, 225], [312, 241], [318, 205], [289, 195]], "text": "SAARI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SAARI", "recog_valid": true, "glyph_recog_text": "SAARI", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164076.jpg", "caption": "a man walking down a street with a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295154.jpg", "caption": "a man sitting in a room with a computer and a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426229.jpg", "caption": "a jockey on a horse racing on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426232.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[130, 304], [105, 376], [155, 387], [190, 359], [207, 334], [201, 321]], "text": "18", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "18", "recog_valid": true, "glyph_recog_text": "18", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295162.jpg", "caption": "two elephants are performing in a circus ring", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164093.jpg", "caption": "a man playing a video game in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295169.jpg", "caption": "a cat playing with a sock on the floor", "annotations": [{"polygon": [[191, 152], [190, 161], [252, 188], [254, 176]], "text": "Super saatcher ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Seeitaidirdner", "recog_valid": false, "glyph_recog_text": "Svpew soacher", "glyph_recog_ld": 0.21428627550980317}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164100.jpg", "caption": "a man and a woman standing around a table with a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426252.jpg", "caption": "a train is pulling into a station with a person standing next to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295185.jpg", "caption": "a white dump truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426261.jpg", "caption": "a stop sign with a picture of a dog on it", "annotations": [{"polygon": [[197, 225], [199, 264], [302, 255], [302, 220]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "dOIS", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557335.jpg", "caption": "a street sign that says moomo on it", "annotations": [{"polygon": [[250, 320], [250, 351], [355, 352], [354, 320]], "text": "OTTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OT TA", "recog_valid": false, "glyph_recog_text": "OTTA", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295197.jpg", "caption": "a black bear looking out a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426269.jpg", "caption": "a man holding a bunch of bananas and a bunch of green leaves", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557345.jpg", "caption": "a gas station with a large gas tank", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295201.jpg", "caption": "a suitcase with a book and shoes inside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426282.jpg", "caption": "two trains are pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033068.jpg", "caption": "a street with cars driving down it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164142.jpg", "caption": "a man holding two plates of food", "annotations": [{"polygon": [[378, 386], [376, 413], [431, 398], [429, 372]], "text": "ASP2", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "ASP2", "recog_valid": true, "glyph_recog_text": "ASP2", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557387.jpg", "caption": "a black and white photo of a sign that says don't honk", "annotations": [{"polygon": [[200, 162], [316, 163], [308, 192], [200, 193]], "text": "DON'T", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DON'T", "recog_valid": true, "glyph_recog_text": "DON'T", "glyph_recog_ld": 1.0}, {"polygon": [[201, 205], [314, 204], [317, 234], [201, 234]], "text": "HONK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HONK", "recog_valid": true, "glyph_recog_text": "HONK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164172.jpg", "caption": "two teddy bears dressed as police officers", "annotations": [{"polygon": [[378, 286], [371, 300], [297, 278], [303, 266]], "text": "LIBERTY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LIBERTY", "recog_valid": true, "glyph_recog_text": "LIBERTY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033105.jpg", "caption": "a person standing on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426355.jpg", "caption": "a collage of various electronic devices including a cell phone, camera, microwave and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295285.jpg", "caption": "a pink fire hydrant in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557434.jpg", "caption": "a man pulling a cart with a horse on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426390.jpg", "caption": "a group of people sitting at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295325.jpg", "caption": "a person taking a bite out of a donut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426400.jpg", "caption": "a man and a child in a stroller near an airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295334.jpg", "caption": "a train on the tracks with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426408.jpg", "caption": "a group of men standing in front of a motorcycle shop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295343.jpg", "caption": "a man and a woman on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426421.jpg", "caption": "a woman laying on a bed with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033213.jpg", "caption": "a pedestrian crossing sign on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033215.jpg", "caption": "a black and white photo of a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426443.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[346, 169], [361, 196], [386, 186], [368, 158]], "text": "E", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "E", "recog_valid": true, "glyph_recog_text": "m", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295376.jpg", "caption": "a double decker bus is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033251.jpg", "caption": "a group of people riding dirt bikes on a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557548.jpg", "caption": "a man taking a photo of children under a cherry tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426477.jpg", "caption": "pizza muslima with spinach and ricotta", "annotations": [{"polygon": [[197, 448], [188, 486], [289, 475], [283, 454]], "text": "PIZZA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "pizzu", "recog_valid": false, "glyph_recog_text": "PIZZA", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295408.jpg", "caption": "a baby giraffe standing in a doorway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033272.jpg", "caption": "a man skiing down a steep slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295428.jpg", "caption": "a fruit stand with many different fruits and vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164360.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557586.jpg", "caption": "a soccer player is sliding down the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295461.jpg", "caption": "a red and white train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033329.jpg", "caption": "a car is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426551.jpg", "caption": "a man and woman preparing food in a kitchen", "annotations": [{"polygon": [[164, 220], [169, 246], [246, 219], [233, 194]], "text": "ALASKA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALASA", "recog_valid": false, "glyph_recog_text": "ALASKA", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557627.jpg", "caption": "a man with a backpack on a subway train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557643.jpg", "caption": "a black and white photo of a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295499.jpg", "caption": "a group of girls playing basketball in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295505.jpg", "caption": "a woman riding a motorcycle in a parade", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426585.jpg", "caption": "a black and white photo of people in an outdoor market", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557664.jpg", "caption": "a street with many signs and buildings in the background", "annotations": [{"polygon": [[139, 142], [140, 178], [258, 182], [255, 145]], "text": "GARDEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "玫瑰园", "recog_valid": false, "glyph_recog_text": "GARDEN", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426598.jpg", "caption": "a bus is parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033383.jpg", "caption": "a bench sits in the grass next to a pond", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164462.jpg", "caption": "two pictures of a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295540.jpg", "caption": "a police car with a stop sign on the front", "annotations": [{"polygon": [[176, 252], [173, 299], [311, 306], [320, 271], [320, 258], [177, 253]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557695.jpg", "caption": "a large gold clock on a pole in front of a building", "annotations": [{"polygon": [[223, 230], [236, 234], [237, 226], [239, 221], [244, 212], [253, 208], [248, 197], [238, 199], [230, 209]], "text": "FIFTH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FIFTH", "recog_valid": true, "glyph_recog_text": "FIFTH", "glyph_recog_ld": 1.0}, {"polygon": [[300, 275], [325, 294], [336, 281], [309, 261]], "text": "III", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "}f!", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[305, 237], [295, 239], [293, 231], [290, 225], [286, 219], [284, 217], [277, 212], [271, 208], [261, 207], [259, 195], [272, 196], [283, 202], [292, 211], [300, 221]], "text": "ANUE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SSNUE", "recog_valid": false, "glyph_recog_text": "ANUE", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033408.jpg", "caption": "a person holding a flip phone", "annotations": [{"polygon": [[293, 164], [326, 186], [319, 195], [286, 173]], "text": "CYON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CYON", "recog_valid": true, "glyph_recog_text": "CYON", "glyph_recog_ld": 1.0}, {"polygon": [[168, 354], [203, 377], [197, 384], [164, 360]], "text": "mp3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "mP3", "recog_valid": false, "glyph_recog_text": "p", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[0, 177], [79, 195], [87, 246], [0, 225]], "text": "SK", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SK", "recog_valid": true, "glyph_recog_text": "S K", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295556.jpg", "caption": "a black and white sign", "annotations": [{"polygon": [[128, 112], [129, 153], [233, 157], [239, 123]], "text": "HURST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HURST", "recog_valid": true, "glyph_recog_text": "HURST", "glyph_recog_ld": 1.0}, {"polygon": [[254, 127], [254, 155], [331, 161], [330, 133]], "text": "GROVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GROVE", "recog_valid": true, "glyph_recog_text": "GROVE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033429.jpg", "caption": "a female soccer player in orange uniform holding a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033431.jpg", "caption": "a group of people standing around a food truck", "annotations": [{"polygon": [[261, 152], [314, 116], [314, 86], [261, 133]], "text": "MARKSGRILL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "小新线上", "recog_valid": false, "glyph_recog_text": "MARKSGRILL", "glyph_recog_ld": 9.99998999939855e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295578.jpg", "caption": "a baseball player is swinging a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033442.jpg", "caption": "a street sign with two signs on it", "annotations": [{"polygon": [[221, 134], [194, 204], [241, 234], [235, 252], [243, 258], [249, 245], [287, 271], [311, 204]], "text": "Maple", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Maple", "recog_valid": true, "glyph_recog_text": "Maple", "glyph_recog_ld": 1.0}, {"polygon": [[352, 214], [333, 247], [371, 271], [376, 240]], "text": "AVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ave", "recog_valid": false, "glyph_recog_text": "AVE", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[344, 268], [335, 290], [370, 298], [371, 284]], "text": "100", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "100", "recog_valid": true, "glyph_recog_text": "100", "glyph_recog_ld": 1.0}, {"polygon": [[458, 320], [436, 353], [500, 367], [505, 338]], "text": "Ave", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Ave", "recog_valid": true, "glyph_recog_text": "Ave", "glyph_recog_ld": 1.0}, {"polygon": [[415, 361], [402, 395], [468, 410], [479, 372]], "text": "200", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "200", "recog_valid": true, "glyph_recog_text": "200", "glyph_recog_ld": 1.0}, {"polygon": [[100, 260], [89, 332], [358, 385], [370, 318]], "text": "Yosemite", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Yosemite", "recog_valid": true, "glyph_recog_text": "Yosemite", "glyph_recog_ld": 1.0}, {"polygon": [[5, 245], [50, 251], [19, 315], [-2, 314]], "text": "W.", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033444.jpg", "caption": "a man and a dog are playing frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426663.jpg", "caption": "a man dressed as a clown stands next to a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426664.jpg", "caption": "a jockey rides a grey horse at a race", "annotations": [{"polygon": [[181, 257], [181, 257], [192, 256], [204, 247], [206, 258], [189, 329], [176, 330], [177, 295], [178, 267]], "text": "1", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "P", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164522.jpg", "caption": "a street sign with arrows pointing in different directions", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426680.jpg", "caption": "a jet plane sitting on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295613.jpg", "caption": "a baby sitting on the floor with a toy in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557771.jpg", "caption": "a motorcycle with a box on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164572.jpg", "caption": "a woman walking down the street with a bag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033505.jpg", "caption": "a boy playing a video game on a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295657.jpg", "caption": "a military truck with an american flag on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164600.jpg", "caption": "a school bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033529.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557824.jpg", "caption": "a sign that says stop before you leave", "annotations": [{"polygon": [[215, 121], [215, 162], [327, 164], [326, 121]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[137, 204], [138, 246], [200, 246], [200, 206]], "text": "Before", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Before", "recog_valid": true, "glyph_recog_text": "Before", "glyph_recog_ld": 1.0}, {"polygon": [[206, 214], [206, 260], [240, 247], [241, 217]], "text": "you", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "多", "recog_valid": false, "glyph_recog_text": ">", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[248, 206], [246, 247], [299, 251], [298, 218]], "text": "leave,", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "leave", "recog_valid": false, "glyph_recog_text": "leave,", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[312, 207], [311, 250], [358, 250], [357, 220]], "text": "have", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "have", "recog_valid": true, "glyph_recog_text": "have", "glyph_recog_ld": 1.0}, {"polygon": [[362, 220], [363, 263], [399, 251], [397, 220]], "text": "you;", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "you", "recog_valid": false, "glyph_recog_text": "y", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295688.jpg", "caption": "wire to cubie with ground metal tape", "annotations": [{"polygon": [[228, 225], [234, 244], [279, 245], [281, 217], [274, 215], [263, 217], [263, 226], [250, 223]], "text": "wall", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "wall", "recog_valid": true, "glyph_recog_text": "wall", "glyph_recog_ld": 1.0}, {"polygon": [[36, 262], [37, 288], [135, 283], [135, 254]], "text": "ground", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ground", "recog_valid": true, "glyph_recog_text": "ground", "glyph_recog_ld": 1.0}, {"polygon": [[238, 386], [238, 415], [302, 416], [301, 386]], "text": "tape", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "tape", "recog_valid": true, "glyph_recog_text": "tape", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426762.jpg", "caption": "a black motorcycle parked on a brick road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164655.jpg", "caption": "a police car is parked next to a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033596.jpg", "caption": "a computer monitor and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426824.jpg", "caption": "a fire hydrant sitting in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426826.jpg", "caption": "a man is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426831.jpg", "caption": "a large room with a table and chairs", "annotations": [{"polygon": [[449, 210], [449, 242], [489, 243], [487, 208]], "text": "Erbi", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Erbi", "recog_valid": true, "glyph_recog_text": "Erbi", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164687.jpg", "caption": "a street sign on a bridge over a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295762.jpg", "caption": "a mirror on a side view mirror of a taxi", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426841.jpg", "caption": "a woman sleeping at a table in a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426845.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557920.jpg", "caption": "a train on the tracks with a blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426849.jpg", "caption": "a cat laying on a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557922.jpg", "caption": "a person sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426853.jpg", "caption": "a man riding a motorcycle in a warehouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426857.jpg", "caption": "a white bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426863.jpg", "caption": "a black and white photo of a man walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557944.jpg", "caption": "a baseball player swinging at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295815.jpg", "caption": "a street sign with a face on it", "annotations": [{"polygon": [[90, 3], [194, 0], [406, 76], [408, 103]], "text": "CHARLOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CHARLOT", "recog_valid": true, "glyph_recog_text": "CHARLOT", "glyph_recog_ld": 1.0}, {"polygon": [[327, 108], [328, 120], [395, 144], [396, 130]], "text": "askipare", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "aeoiporb", "recog_valid": false, "glyph_recog_text": "sskipare", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033687.jpg", "caption": "a street with many signs and cars parked on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426903.jpg", "caption": "two women posing with a motorcycle at a car show", "annotations": [{"polygon": [[89, 130], [144, 131], [149, 160], [90, 158]], "text": "OUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "OUR", "recog_valid": true, "glyph_recog_text": "OUR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295836.jpg", "caption": "two sandwiches and a beer on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033698.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000557990.jpg", "caption": "a woman wearing a red tie standing in front of a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295853.jpg", "caption": "a train traveling down the tracks near a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164786.jpg", "caption": "a train with a large design on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033718.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164793.jpg", "caption": "a man eating a doughnut", "annotations": [{"polygon": [[55, 192], [51, 218], [111, 239], [116, 223]], "text": "Bazinga", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Bozingo", "recog_valid": false, "glyph_recog_text": "Bazinga", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[29, 80], [18, 121], [82, 140], [85, 99]], "text": "LC", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "LC", "recog_valid": true, "glyph_recog_text": "LC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558015.jpg", "caption": "a large airplane with a large cargo door", "annotations": [{"polygon": [[306, 94], [376, 107], [478, 142], [460, 202], [279, 157]], "text": "Lufthansa", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Lufthansa", "recog_valid": true, "glyph_recog_text": "Lufthansa", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426947.jpg", "caption": "a brown dog laying on the grass with a green frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033758.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426976.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[132, 365], [136, 466], [182, 457], [223, 448], [239, 381], [201, 370]], "text": "TN", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "JN", "recog_valid": false, "glyph_recog_text": "TN", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[352, 439], [357, 460], [416, 450], [411, 429]], "text": "STUDIOS", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "SIUDIOS", "recog_valid": false, "glyph_recog_text": "STUDIOS", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[252, 402], [278, 442], [298, 431], [315, 432], [345, 427], [346, 420], [339, 391], [273, 391]], "text": "CTW", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "CTW", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[250, 355], [265, 398], [341, 385], [341, 373], [316, 358]], "text": "KFIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RFiA", "recog_valid": false, "glyph_recog_text": "KFIA", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[58, 198], [56, 320], [333, 350], [364, 279], [358, 245], [337, 233]], "text": "TOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TOO", "recog_valid": false, "glyph_recog_text": "TOP", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426975.jpg", "caption": "a baseball player holding a bat in front of a catcher and umpire", "annotations": [{"polygon": [[164, 259], [168, 285], [334, 292], [336, 260]], "text": "ANK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TRANK", "recog_valid": false, "glyph_recog_text": "ANK", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000426980.jpg", "caption": "a black and white photo of people sitting in chairs and eating food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558055.jpg", "caption": "a bread in a toaster oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164842.jpg", "caption": "a skateboarder is riding a ramp at a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033773.jpg", "caption": "an older man holding a tennis racket on a tennis court", "annotations": [{"polygon": [[106, 168], [127, 160], [151, 166], [152, 187], [105, 190]], "text": "prince", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "rince", "recog_valid": false, "glyph_recog_text": "prince", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[165, 167], [182, 159], [213, 164], [215, 173], [213, 185], [165, 190]], "text": "prince", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "adice", "recog_valid": false, "glyph_recog_text": "prince", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[228, 191], [230, 166], [249, 157], [282, 165], [280, 184]], "text": "prince", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "eriurne", "recog_valid": false, "glyph_recog_text": "prince", "glyph_recog_ld": 0.428572244896793}, {"polygon": [[459, 151], [501, 136], [511, 140], [512, 151], [513, 190], [463, 198], [453, 197]], "text": "pri", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "pri", "recog_valid": true, "glyph_recog_text": "pri", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033786.jpg", "caption": "a street sign is covered in snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295940.jpg", "caption": "a woman holding a baby and holding a frisbee", "annotations": [{"polygon": [[335, 264], [398, 261], [401, 380], [340, 382]], "text": "7", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "卜", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558092.jpg", "caption": "a plate with rice, meat and eggs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558113.jpg", "caption": "a tennis player in action on a clay court", "annotations": [{"polygon": [[85, 48], [183, 68], [173, 106], [85, 86]], "text": "Orange", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Drange", "recog_valid": false, "glyph_recog_text": "Orange", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[287, 83], [188, 65], [177, 101], [273, 120]], "text": "e sport", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "sport", "recog_valid": false, "glyph_recog_text": "e sport", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[333, 140], [353, 74], [427, 86], [427, 154]], "text": "AFF", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "AFI", "recog_valid": false, "glyph_recog_text": "AFF", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000295970.jpg", "caption": "a desk with a fruit on it", "annotations": [{"polygon": [[181, 199], [203, 185], [210, 183], [221, 185], [235, 192], [247, 191], [251, 212], [234, 213], [215, 206], [198, 204], [185, 208]], "text": "rotting", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "cetting", "recog_valid": false, "glyph_recog_text": "rotting", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427051.jpg", "caption": "a red and yellow bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164909.jpg", "caption": "a red and white van", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427068.jpg", "caption": "a close up of a red cell phone", "annotations": [{"polygon": [[236, 351], [262, 350], [267, 324], [259, 319], [246, 320], [243, 325]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "Z", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[250, 393], [243, 427], [258, 429], [267, 427], [274, 416], [277, 391]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "n", "recog_valid": false, "glyph_recog_text": "LO", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[399, 452], [401, 493], [418, 491], [424, 482], [434, 465], [430, 451]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "9", "recog_valid": true, "glyph_recog_text": "9", "glyph_recog_ld": 1.0}, {"polygon": [[245, 477], [239, 509], [273, 513], [279, 476]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "8", "recog_valid": true, "glyph_recog_text": "8", "glyph_recog_ld": 1.0}, {"polygon": [[89, 455], [82, 495], [91, 497], [117, 457]], "text": "7p", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[111, 375], [88, 399], [104, 412], [109, 412], [122, 399], [122, 377]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "又", "recog_valid": false, "glyph_recog_text": "4", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[391, 371], [387, 403], [399, 406], [415, 403], [421, 394], [412, 368]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "6", "recog_valid": true, "glyph_recog_text": "6", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427069.jpg", "caption": "a street sign in china", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296009.jpg", "caption": "a row of blue and yellow vases with plants in them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164944.jpg", "caption": "a large airplane parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427096.jpg", "caption": "a red fire hydrant in the grass near a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000164955.jpg", "caption": "a man on a snowboard doing a trick on a ramp", "annotations": [{"polygon": [[132, 151], [132, 177], [126, 206], [115, 230], [120, 231], [133, 208], [147, 181], [151, 149]], "text": "KEE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KEEIC.", "recog_valid": false, "glyph_recog_text": "33¥", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427103.jpg", "caption": "a parking meter with a solar panel on top", "annotations": [{"polygon": [[321, 212], [321, 212], [323, 231], [325, 268], [326, 269], [334, 269], [333, 249], [343, 249], [350, 241], [353, 229], [349, 214], [342, 210]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "Q", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[314, 330], [357, 324], [360, 355], [315, 359]], "text": "HERE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HERE", "recog_valid": true, "glyph_recog_text": "HERE", "glyph_recog_ld": 1.0}, {"polygon": [[320, 291], [320, 291], [321, 325], [351, 320], [352, 288]], "text": "PAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PAY", "recog_valid": true, "glyph_recog_text": "R", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033888.jpg", "caption": "a red stop sign with a sunset in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296033.jpg", "caption": "a man sitting on the snow", "annotations": [{"polygon": [[223, 417], [231, 412], [263, 436], [254, 442]], "text": "BURTOL", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "X01-08", "recog_valid": false, "glyph_recog_text": "BUPTOL", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033891.jpg", "caption": "a street sign and a traffic light", "annotations": [{"polygon": [[116, 186], [113, 204], [173, 231], [173, 216]], "text": "CROWN COLA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CROHMMIOOLN", "recog_valid": false, "glyph_recog_text": "CROWN COLA", "glyph_recog_ld": 0.45454595041277235}, {"polygon": [[85, 187], [85, 252], [125, 301], [162, 286], [170, 267], [172, 232]], "text": "RC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TO", "recog_valid": false, "glyph_recog_text": "RC", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[88, 337], [87, 357], [160, 376], [160, 356]], "text": "MARKET", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "MARKET", "recog_valid": true, "glyph_recog_text": "MARKET", "glyph_recog_ld": 1.0}, {"polygon": [[86, 301], [85, 330], [161, 351], [161, 321]], "text": "JOHN'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "JOHNS", "recog_valid": false, "glyph_recog_text": "JOHN'S", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558190.jpg", "caption": "a blender with red liquid in it on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427123.jpg", "caption": "three men in military uniforms standing next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033918.jpg", "caption": "a dirty toilet with a disc in the toilet", "annotations": [{"polygon": [[238, 195], [257, 194], [271, 190], [280, 182], [285, 175], [297, 180], [287, 190], [279, 197], [265, 203], [250, 204], [236, 205], [236, 196]], "text": "COMPLAINCE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CONPIIANCE", "recog_valid": false, "glyph_recog_text": "COMPLANCE", "glyph_recog_ld": 0.7000002999996999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558221.jpg", "caption": "a man is playing tennis", "annotations": [{"polygon": [[123, 24], [128, 41], [226, 42], [226, 22], [184, 10]], "text": "Thailand", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Thailand", "recog_valid": true, "glyph_recog_text": "Thailand", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427150.jpg", "caption": "a sign hanging above a counter in a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000033946.jpg", "caption": "a bowl of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427169.jpg", "caption": "a man giving a lecture in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558269.jpg", "caption": "a street sign has texts", "annotations": [{"polygon": [[165, 238], [165, 238], [171, 274], [326, 276], [323, 241]], "text": "Vermilion", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Vermilion", "recog_valid": true, "glyph_recog_text": "Vermilion", "glyph_recog_ld": 1.0}, {"polygon": [[336, 242], [337, 277], [386, 278], [387, 243]], "text": "Est", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Est", "recog_valid": true, "glyph_recog_text": "Est", "glyph_recog_ld": 1.0}, {"polygon": [[96, 239], [94, 274], [152, 274], [154, 240]], "text": "Rue", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Rue", "recog_valid": true, "glyph_recog_text": "Rue", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558271.jpg", "caption": "a cake with strawberries and candles on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558274.jpg", "caption": "a group of people playing frisbee on a field", "annotations": [{"polygon": [[320, 164], [355, 163], [350, 196], [315, 197]], "text": "15", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5", "recog_valid": false, "glyph_recog_text": "15", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165062.jpg", "caption": "a cat laying on a desk next to a telephone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296137.jpg", "caption": "an air canada airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296153.jpg", "caption": "a woman sitting at a table with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427252.jpg", "caption": "la vieille ferme chardonnay", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034039.jpg", "caption": "a woman sitting on a couch with a cat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427283.jpg", "caption": "a purple train at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165147.jpg", "caption": "a man in blue shorts and a gray shirt is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558365.jpg", "caption": "a large ship in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558372.jpg", "caption": "a man taking a picture of a table on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165169.jpg", "caption": "a red train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034097.jpg", "caption": "two horses pulling a wagon with a man in it", "annotations": [{"polygon": [[48, 97], [48, 97], [48, 120], [193, 131], [195, 110]], "text": "NORTHERN", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "NORTHERN", "recog_valid": true, "glyph_recog_text": "NORTHERN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558401.jpg", "caption": "a table with a laptop, a tablet, and a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296257.jpg", "caption": "a person standing next to a motorcycle with a bag on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427335.jpg", "caption": "a table with a lot of food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165200.jpg", "caption": "a woman playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427344.jpg", "caption": "a cat laying on top of a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296277.jpg", "caption": "a sea plane flying over the water with mountains in the background", "annotations": [{"polygon": [[101, 464], [101, 464], [98, 503], [98, 503], [118, 489], [163, 483], [166, 484], [165, 472], [120, 475], [118, 460]], "text": "Jason", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "大套", "recog_valid": false, "glyph_recog_text": "Jason", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[177, 459], [166, 489], [210, 489], [214, 469]], "text": "Low", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "1e", "recog_valid": false, "glyph_recog_text": "Low", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296289.jpg", "caption": "a group of people riding skateboards down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558444.jpg", "caption": "a group of people holding up their cell phones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034169.jpg", "caption": "a train car on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427399.jpg", "caption": "a man walking past a clock tower in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558475.jpg", "caption": "a sign that says cyclists must stop", "annotations": [{"polygon": [[229, 65], [269, 65], [278, 108], [278, 114], [225, 114]], "text": "ALL-", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ALL", "recog_valid": false, "glyph_recog_text": "ALL-", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[286, 65], [347, 64], [342, 113], [288, 114]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[214, 174], [361, 174], [361, 205], [211, 204]], "text": "CYCLISTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CYCLISTS", "recog_valid": true, "glyph_recog_text": "CYCLISTS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558508.jpg", "caption": "a street with cars parked on both sides of the road", "annotations": [{"polygon": [[108, 83], [108, 83], [134, 96], [165, 116], [174, 125], [174, 164], [109, 137]], "text": "THE ", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "'THE", "recog_valid": false, "glyph_recog_text": "THE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034223.jpg", "caption": "a group of men standing around a table with a robot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296374.jpg", "caption": "a group of people standing in the snow", "annotations": [{"polygon": [[343, 129], [343, 147], [512, 135], [512, 114]], "text": "Schonbodenb", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Schonbodenb", "recog_valid": true, "glyph_recog_text": "Schonbodenb", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427449.jpg", "caption": "a man standing next to a motorcycle in a garage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296384.jpg", "caption": "a group of people standing in front of a screen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165319.jpg", "caption": "a soccer player kicking the ball in front of other players", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427467.jpg", "caption": "a dog is sitting in the back of a blue truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558542.jpg", "caption": "a yellow fire truck with a ladder on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558556.jpg", "caption": "a plant with two umbrellas in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165350.jpg", "caption": "a group of cyclists racing down the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296432.jpg", "caption": "a woman standing next to a train at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165373.jpg", "caption": "a kitchen counter with a stove and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427519.jpg", "caption": "a person holding a pen next to a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427526.jpg", "caption": "a small jet airplane sitting on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558615.jpg", "caption": "a group of people standing around a food truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427555.jpg", "caption": "two women sitting at a table with plates of food", "annotations": [{"polygon": [[86, 394], [259, 382], [268, 424], [87, 440]], "text": "Coca Cola", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ESZ", "recog_valid": false, "glyph_recog_text": "Coca Cola", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165412.jpg", "caption": "a white and red bus on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034356.jpg", "caption": "a woman sitting in a chair with two dogs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427606.jpg", "caption": "a cat sitting on top of a pile of books", "annotations": [{"polygon": [[93, 374], [94, 391], [192, 363], [190, 347]], "text": "S+ARCK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5+ARO", "recog_valid": false, "glyph_recog_text": "S+ARCK", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427618.jpg", "caption": "a large truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558697.jpg", "caption": "a large passenger jet sitting on the tarmac at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296560.jpg", "caption": "a giraffe with its head over a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165488.jpg", "caption": "a woman walks past a poster of the ruling party in kathmandu, nepal, on june 30, 2015", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034423.jpg", "caption": "a plate of food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427642.jpg", "caption": "a large passenger jet flying in the sky", "annotations": [{"polygon": [[405, 237], [425, 211], [424, 208], [427, 208], [434, 199], [438, 202], [435, 205], [436, 207], [434, 210], [432, 210], [412, 237], [413, 239], [410, 242], [410, 242], [407, 240]], "text": "germanwings", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "germainwinge", "recog_valid": false, "glyph_recog_text": "SarhAgleueiah", "glyph_recog_ld": 0.07692378698170232}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034430.jpg", "caption": "a baseball player is standing in front of a television screen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296613.jpg", "caption": "a cake on a plate", "annotations": [{"polygon": [[189, 393], [192, 365], [295, 372], [292, 399]], "text": "JULY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JIUILY", "recog_valid": false, "glyph_recog_text": "JULY", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296614.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[239, 235], [229, 229], [208, 221], [193, 241], [196, 250], [204, 257], [215, 261]], "text": "GV", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GV", "recog_valid": true, "glyph_recog_text": "o", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427688.jpg", "caption": "two men standing on a tennis court holding tennis rackets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296635.jpg", "caption": "two hot dogs with toppings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558788.jpg", "caption": "a small plane on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558804.jpg", "caption": "two baseball players pose for a photo in front of a dugout", "annotations": [{"polygon": [[292, 219], [307, 209], [324, 200], [330, 205], [330, 212], [306, 233], [299, 236], [293, 228]], "text": "RED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AED", "recog_valid": false, "glyph_recog_text": "RED", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034528.jpg", "caption": "a collage of pictures showing different dishes and food", "annotations": [{"polygon": [[260, 113], [319, 114], [317, 91], [312, 92], [311, 81], [257, 87], [259, 114]], "text": "lunch", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "lunch", "recog_valid": true, "glyph_recog_text": "lunch", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296675.jpg", "caption": "a baseball player is standing at home plate", "annotations": [{"polygon": [[326, 192], [323, 225], [348, 224], [356, 193], [328, 191]], "text": "16", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "16", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165607.jpg", "caption": "a man and woman standing in front of a rug", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558826.jpg", "caption": "a woman holding an umbrella and a dog on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558839.jpg", "caption": "a man in blue shirt and white shorts is playing tennis", "annotations": [{"polygon": [[0, 201], [1, 296], [41, 295], [40, 199]], "text": "I", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "-", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427770.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558851.jpg", "caption": "a man and a woman in a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427779.jpg", "caption": "a group of people playing frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427789.jpg", "caption": "a bike with flowers in a basket on a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427803.jpg", "caption": "two old airplanes parked on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165684.jpg", "caption": "a plate with a sandwich and a knife and fork", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558900.jpg", "caption": "two men playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296758.jpg", "caption": "a bathroom sink with a toothbrush and toothpaste", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034629.jpg", "caption": "two horses are standing in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034632.jpg", "caption": "a man riding a motorcycle down a street", "annotations": [{"polygon": [[183, 252], [189, 246], [190, 244], [202, 239], [214, 239], [223, 241], [230, 265], [218, 263], [209, 265], [195, 271], [190, 253], [184, 256]], "text": "11", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1", "recog_valid": false, "glyph_recog_text": "11", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165741.jpg", "caption": "a plate of fish with lemon wedges on it", "annotations": [{"polygon": [[161, 362], [161, 362], [266, 420], [285, 407], [168, 343], [162, 362]], "text": "UBE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "801", "recog_valid": false, "glyph_recog_text": "UBE", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427909.jpg", "caption": "a table with a bunch of doughnuts on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165766.jpg", "caption": "a yellow fire hydrant sitting on a brick sidewalk", "annotations": [{"polygon": [[198, 69], [196, 121], [423, 123], [425, 70]], "text": "decorum", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "decorum", "recog_valid": true, "glyph_recog_text": "decorum", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296844.jpg", "caption": "a laptop computer with a book open on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427921.jpg", "caption": "a white horse with a blue ribbon around its neck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296892.jpg", "caption": "a banana smoothie and a jar of peanut butter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165829.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296901.jpg", "caption": "a man riding a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165833.jpg", "caption": "a man on a tennis court", "annotations": [{"polygon": [[63, 138], [449, 145], [448, 207], [63, 199]], "text": "PARIBA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PARIBA", "recog_valid": true, "glyph_recog_text": "PARIBA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034779.jpg", "caption": "a red and black motorcycle parked on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165855.jpg", "caption": "a large jetliner parked at the gate", "annotations": [{"polygon": [[0, 389], [-1, 398], [41, 374], [40, 368]], "text": "UNITED", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "INITED", "recog_valid": false, "glyph_recog_text": "10号·十民示", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559073.jpg", "caption": "a cat sitting on top of a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034785.jpg", "caption": "a hot dog and fries on a tray", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428006.jpg", "caption": "a sandwich and a drink on a tray", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428019.jpg", "caption": "a baseball player is swinging a bat at a ball", "annotations": [{"polygon": [[103, 343], [79, 359], [127, 403], [160, 392]], "text": "METS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "METS", "recog_valid": true, "glyph_recog_text": "METS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428029.jpg", "caption": "a train traveling over a bridge over a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034815.jpg", "caption": "a man and a woman standing next to a train", "annotations": [{"polygon": [[382, 407], [387, 412], [424, 377], [421, 372]], "text": "WATERFORD", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "", "recog_valid": false, "glyph_recog_text": "h心zzylem", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428035.jpg", "caption": "a boy sitting on a bench reading a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034825.jpg", "caption": "a woman laying in bed reading a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034827.jpg", "caption": "a refrigerator with a full freezer and a full refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034828.jpg", "caption": "a motorcycle parked in a field with people and cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428046.jpg", "caption": "a group of people playing frisbee on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559132.jpg", "caption": "a bride and groom walking down the street", "annotations": [{"polygon": [[281, 465], [285, 413], [392, 419], [395, 457]], "text": "Alan", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "AE", "recog_valid": false, "glyph_recog_text": "Alan", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428065.jpg", "caption": "a man on a motorcycle with an american flag on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165921.jpg", "caption": "a bowl of vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000296995.jpg", "caption": "a person sitting on a couch with a tv on", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428071.jpg", "caption": "a refrigerator with its door open and its shelves full of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559145.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297023.jpg", "caption": "a train is parked in the snow next to a tanker", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165956.jpg", "caption": "a person sitting on a bed with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165977.jpg", "caption": "a blue and white motorcycle parked on the ground", "annotations": [{"polygon": [[212, 300], [208, 309], [274, 341], [277, 333]], "text": "SUZUKI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SJZUK", "recog_valid": false, "glyph_recog_text": "suiwRi", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559194.jpg", "caption": "a baseball player pitching a ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165990.jpg", "caption": "lufthansa a380-800 at the airport", "annotations": [{"polygon": [[220, 246], [314, 263], [304, 302], [212, 277]], "text": "Lufthansa", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Luthansa", "recog_valid": false, "glyph_recog_text": "Lufthansa", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559214.jpg", "caption": "a street sign with a one way street sign on it", "annotations": [{"polygon": [[357, 171], [360, 204], [399, 198], [397, 166]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "ST", "glyph_recog_ld": 1.0}, {"polygon": [[78, 190], [92, 240], [324, 209], [326, 160]], "text": "WALNUT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WALNUT", "recog_valid": true, "glyph_recog_text": "WALNUT", "glyph_recog_ld": 1.0}, {"polygon": [[287, 253], [299, 287], [379, 276], [386, 242]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[164, 269], [169, 302], [263, 290], [259, 259]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165999.jpg", "caption": "a large jetliner on the runway with a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428142.jpg", "caption": "a woman walking with a baby in a stroller and a sign that says still do not get up", "annotations": [{"polygon": [[242, 209], [240, 235], [282, 223], [282, 197]], "text": "JETZT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YTRT", "recog_valid": false, "glyph_recog_text": "JETZT", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[160, 225], [172, 257], [219, 256], [212, 222]], "text": "5772", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "民", "recog_valid": false, "glyph_recog_text": "5772", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559235.jpg", "caption": "a group of people holding red umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297092.jpg", "caption": "two baseball players walking on a field", "annotations": [{"polygon": [[139, 248], [156, 234], [179, 232], [200, 240], [210, 246], [215, 231], [195, 221], [172, 215], [153, 220], [138, 228]], "text": "INDIANAPOLLS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "neicon", "recog_valid": false, "glyph_recog_text": "NOUANPPOLS", "glyph_recog_ld": 9.99998999939855e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166043.jpg", "caption": "a bowl of food and a laptop on a table", "annotations": [{"polygon": [[269, 101], [296, 101], [323, 88], [328, 79], [330, 65], [307, 75], [281, 83], [271, 81]], "text": "GILT", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "力儿", "recog_valid": false, "glyph_recog_text": "GILT", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[115, 375], [92, 398], [83, 391], [107, 368]], "text": "ThinkPad", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ThinkPa", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[102, 372], [62, 408], [47, 395], [88, 360]], "text": "IBM", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "IEM", "recog_valid": false, "glyph_recog_text": "IBM", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166047.jpg", "caption": "a man swinging a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034985.jpg", "caption": "a flooded street with a sign and a yellow traffic cone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428212.jpg", "caption": "a train is on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166069.jpg", "caption": "a man is playing frisbee in a field with people watching", "annotations": [{"polygon": [[113, 292], [122, 281], [146, 304], [136, 314]], "text": "BILL'A", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BILL", "recog_valid": false, "glyph_recog_text": "8U", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[137, 315], [149, 306], [177, 348], [168, 355]], "text": "LABONG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ABON", "recog_valid": false, "glyph_recog_text": "LABONG", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559288.jpg", "caption": "a military vehicle parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166073.jpg", "caption": "a young boy is playing tennis on a court", "annotations": [{"polygon": [[248, 264], [319, 262], [319, 262], [319, 262], [320, 312], [322, 314], [322, 314], [307, 305]], "text": "tre", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AE", "recog_valid": false, "glyph_recog_text": "tre", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035002.jpg", "caption": "a person sitting under an umbrella on a pier", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297152.jpg", "caption": "a pile of cell phones", "annotations": [{"polygon": [[276, 46], [271, 120], [306, 120], [317, 44]], "text": "DDB", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "800", "recog_valid": false, "glyph_recog_text": "a0", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[126, 194], [136, 207], [196, 165], [189, 152]], "text": "FEARLESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FEARLES", "recog_valid": false, "glyph_recog_text": "FEARLESS", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[143, 437], [132, 463], [214, 468], [216, 443]], "text": "change", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Change", "recog_valid": false, "glyph_recog_text": "change", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035008.jpg", "caption": "a surfer riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166086.jpg", "caption": "a small plane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559306.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[39, 89], [201, 84], [199, 166], [39, 171]], "text": "754", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "754", "recog_valid": true, "glyph_recog_text": "754", "glyph_recog_ld": 1.0}, {"polygon": [[266, 225], [288, 203], [301, 222], [318, 254], [320, 262], [298, 274], [289, 253], [276, 236]], "text": "NTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NTS", "recog_valid": true, "glyph_recog_text": "NTS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559331.jpg", "caption": "a skateboarder is doing a trick", "annotations": [{"polygon": [[76, 445], [174, 444], [175, 504], [76, 506]], "text": "IT'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "IT'S", "recog_valid": true, "glyph_recog_text": "IT'S", "glyph_recog_ld": 1.0}, {"polygon": [[265, 445], [264, 504], [192, 505], [192, 445]], "text": "ON", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ON", "recog_valid": true, "glyph_recog_text": "ON", "glyph_recog_ld": 1.0}, {"polygon": [[293, 445], [294, 502], [440, 504], [440, 445]], "text": "DUDE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DUDE", "recog_valid": true, "glyph_recog_text": "DUDE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035045.jpg", "caption": "a person holding a wii remote", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297191.jpg", "caption": "a fire hydrant is on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166126.jpg", "caption": "a view of a street from an apartment window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428288.jpg", "caption": "a man riding a snowboard on a rock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297227.jpg", "caption": "two police officers on bicycles", "annotations": [{"polygon": [[230, 113], [224, 121], [234, 122], [245, 127], [264, 137], [274, 147], [282, 139], [270, 130], [252, 121], [241, 116], [231, 111], [229, 113]], "text": "POLICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "POLE", "recog_valid": false, "glyph_recog_text": "POLICE", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428309.jpg", "caption": "a young girl sitting on the grass next to a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035094.jpg", "caption": "a desk with two computer monitors and a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035102.jpg", "caption": "australia's first teddy bear to visit china", "annotations": [{"polygon": [[176, 155], [260, 138], [339, 128], [470, 116], [497, 158], [437, 163], [326, 169], [216, 182], [158, 196]], "text": "AUSTRALIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AUSTRALIA", "recog_valid": true, "glyph_recog_text": "AUSTRALIA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035110.jpg", "caption": "a group of men in military uniforms riding horses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428326.jpg", "caption": "a bus with a large advertisement on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297257.jpg", "caption": "a living room with a couch, table and chairs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297258.jpg", "caption": "a large white airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035127.jpg", "caption": "a laptop computer sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166200.jpg", "caption": "a stuffed bear sitting in a high chair with a bottle of honey", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035132.jpg", "caption": "a man and a woman sitting on motorcycles at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559438.jpg", "caption": "a japan airways airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166254.jpg", "caption": "a man preparing food at a food stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428407.jpg", "caption": "a green apple", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559484.jpg", "caption": "a red stop sign in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035211.jpg", "caption": "a girl sitting on a chair holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166291.jpg", "caption": "a river boat with people on it traveling down a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297363.jpg", "caption": "a group of people sitting at a table eating food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428460.jpg", "caption": "an airplane with orange, blue and white stripes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559553.jpg", "caption": "two pictures of a girl kicking a soccer ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297409.jpg", "caption": "an elephant is standing in a field with a crowd watching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297417.jpg", "caption": "a tennis player is playing on a court", "annotations": [{"polygon": [[187, 170], [185, 239], [453, 247], [454, 183]], "text": "Gillette", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Gillette", "recog_valid": true, "glyph_recog_text": "Gillette", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166356.jpg", "caption": "a stop sign with a rainbow in the sky", "annotations": [{"polygon": [[312, 259], [312, 217], [399, 207], [400, 248]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166386.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428565.jpg", "caption": "a piece of cake sitting on a plate with a soda", "annotations": [{"polygon": [[242, 118], [253, 121], [261, 121], [277, 123], [281, 120], [292, 116], [287, 140], [292, 139], [293, 146], [276, 151], [268, 153], [260, 152], [255, 150], [242, 148]], "text": "FIZZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FIZZ", "recog_valid": true, "glyph_recog_text": "FIZZ", "glyph_recog_ld": 1.0}, {"polygon": [[202, 265], [202, 265], [207, 271], [211, 269], [220, 271], [223, 271], [258, 252], [259, 248], [259, 243], [254, 242], [246, 240], [225, 252], [213, 258], [209, 256], [206, 259], [201, 262], [201, 264]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Na", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[217, 274], [217, 274], [228, 269], [241, 263], [246, 260], [262, 251], [284, 245], [289, 246], [293, 252], [294, 261], [292, 264], [287, 268], [284, 266], [277, 257], [229, 287], [219, 292], [215, 288], [215, 284], [214, 280], [213, 278]], "text": "birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CNNG", "recog_valid": false, "glyph_recog_text": "birthday", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559645.jpg", "caption": "a person sitting on the side of a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559647.jpg", "caption": "a baseball player sliding into home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297509.jpg", "caption": "a kitchen with a man standing in the middle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428585.jpg", "caption": "a body of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297517.jpg", "caption": "a train with smoke coming out of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559657.jpg", "caption": "a bench in a field with a view of a town", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166467.jpg", "caption": "a city square with a clock tower in the background", "annotations": [{"polygon": [[98, 398], [98, 375], [78, 376], [76, 368], [27, 371], [14, 399], [14, 400]], "text": "AMZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "AMz", "recog_valid": false, "glyph_recog_text": "AMZ", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035407.jpg", "caption": "a large sandwich and a drink on a tray", "annotations": [{"polygon": [[50, 127], [57, 142], [99, 134], [127, 124], [142, 114], [142, 97], [122, 108], [100, 117]], "text": "DUNKIN'", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "DUNKIN.", "recog_valid": false, "glyph_recog_text": "DUNKIN'", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[58, 143], [63, 157], [91, 155], [114, 147], [133, 141], [144, 132], [143, 115], [127, 125], [89, 138]], "text": "Dunkin' Donuts", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "DONUTS", "recog_valid": false, "glyph_recog_text": "Dunkin' Dronuts", "glyph_recog_ld": 0.06666728888847406}, {"polygon": [[125, 276], [142, 281], [158, 244], [142, 243]], "text": "Chili", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Chil!", "recog_valid": false, "glyph_recog_text": "Chili", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166481.jpg", "caption": "a man in black shoes and white shirt playing tennis", "annotations": [{"polygon": [[0, 334], [0, 334], [1, 363], [12, 363], [33, 363], [57, 362], [82, 362], [95, 360], [108, 361], [111, 332], [103, 332], [88, 332], [72, 332], [54, 332], [31, 333]], "text": "TIZEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "TIZEN", "recog_valid": true, "glyph_recog_text": "TIZEN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297559.jpg", "caption": "a wii with a remote and a controller", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297564.jpg", "caption": "a double decker bus parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035424.jpg", "caption": "a train on the tracks with people standing near it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166503.jpg", "caption": "a yellow double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559720.jpg", "caption": "three people standing in the rain with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297585.jpg", "caption": "a bus driving down a street with a car behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428663.jpg", "caption": "a woman flying a kite with a face on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559747.jpg", "caption": "united airlines boeing 747-400", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559755.jpg", "caption": "a stop sign on a street corner with a car driving by", "annotations": [{"polygon": [[280, 134], [280, 97], [380, 97], [382, 135]], "text": "ALTO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ALTO", "recog_valid": true, "glyph_recog_text": "ALTO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035475.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[147, 389], [199, 351], [200, 335], [173, 356], [173, 351], [167, 353], [167, 359], [156, 370], [152, 364], [145, 389]], "text": "Augsburger", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Hugsburger", "recog_valid": false, "glyph_recog_text": "Augsbuiger", "glyph_recog_ld": 0.8000001999998}, {"polygon": [[122, 197], [130, 203], [132, 201], [207, 292], [209, 275], [200, 265], [200, 257], [132, 171]], "text": "Joachimstaler", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Joachimstaler", "recog_valid": true, "glyph_recog_text": "Joachimstaler", "glyph_recog_ld": 1.0}, {"polygon": [[238, 332], [236, 327], [230, 329], [230, 322], [209, 295], [212, 274], [220, 283], [218, 289], [220, 288], [222, 294], [229, 301], [232, 298], [235, 305], [239, 315]], "text": "Stralze", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Sraar", "recog_valid": false, "glyph_recog_text": "Stralze", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297622.jpg", "caption": "a woman walking on a tennis court at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297632.jpg", "caption": "a person holding a blackberry phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166585.jpg", "caption": "two dogs are standing in a canoe on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428740.jpg", "caption": "a green train at a train station with people on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166596.jpg", "caption": "a blue sign on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297684.jpg", "caption": "a horse is standing on its hind legs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559836.jpg", "caption": "a woman holding a slice of pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297695.jpg", "caption": "a woman with a child in a stroller", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035552.jpg", "caption": "a young boy swinging a baseball bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559840.jpg", "caption": "a young girl sitting at a table with a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297704.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166637.jpg", "caption": "a red car parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559857.jpg", "caption": "a large passenger jet flying through a blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559877.jpg", "caption": "a burger and fries on a table", "annotations": [{"polygon": [[272, 255], [278, 250], [292, 247], [307, 247], [324, 246], [335, 248], [344, 250], [341, 285], [327, 281], [318, 281], [303, 281], [287, 281], [279, 284]], "text": "KETCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ETCH", "recog_valid": false, "glyph_recog_text": "KETCH", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297738.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428817.jpg", "caption": "a stop sign with a solar panel on it", "annotations": [{"polygon": [[136, 168], [136, 168], [354, 160], [358, 253], [131, 257]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166678.jpg", "caption": "a table with a cake and a box of chocolates on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166692.jpg", "caption": "a young girl signing a wedding cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559908.jpg", "caption": "a woman playing tennis on a tennis court", "annotations": [{"polygon": [[265, 120], [263, 148], [446, 141], [446, 113]], "text": "Fotovoltaica", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Fotovoltaica", "recog_valid": true, "glyph_recog_text": "Fotovoltaica", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559912.jpg", "caption": "a brown bear is standing in front of a rock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297770.jpg", "caption": "a street sign on a brick sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035630.jpg", "caption": "a train car with graffiti on it", "annotations": [{"polygon": [[119, 414], [119, 427], [113, 434], [115, 439], [143, 442], [152, 442], [173, 442], [197, 441], [223, 440], [219, 427], [215, 419], [203, 415], [204, 403], [191, 397], [170, 400], [149, 402]], "text": "LIBER", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "OTR", "recog_valid": false, "glyph_recog_text": "LIBER", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[338, 406], [339, 438], [370, 438], [406, 436], [425, 435], [417, 417], [415, 404], [405, 400], [376, 401], [342, 401]], "text": "MUSCR", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "MUSER", "recog_valid": false, "glyph_recog_text": "MUSCR", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297802.jpg", "caption": "a man on skis is descending a steep slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035670.jpg", "caption": "a man and a child on skis on a snow covered slope", "annotations": [{"polygon": [[74, 87], [102, 124], [103, 126], [113, 115], [109, 102], [88, 86]], "text": "eek", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "eek", "recog_valid": true, "glyph_recog_text": "eek", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559958.jpg", "caption": "a pair of scissors and a red string", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166764.jpg", "caption": "two young boys sitting on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297843.jpg", "caption": "a large jetliner flying in the blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297844.jpg", "caption": "two women playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559995.jpg", "caption": "a one way sign on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560004.jpg", "caption": "a teddy bear sitting on a wooden step", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560007.jpg", "caption": "a man in a red shirt is standing in a small room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166794.jpg", "caption": "a red and black train engine with a sign on it", "annotations": [{"polygon": [[171, 268], [174, 242], [242, 255], [240, 288]], "text": "DOLGOCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DOLGOCH", "recog_valid": true, "glyph_recog_text": "DOL GOCH", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[335, 265], [335, 272], [341, 272], [348, 273], [353, 274], [358, 277], [363, 280], [367, 284], [372, 290], [375, 299], [380, 299], [379, 291], [378, 287], [375, 281], [370, 276], [362, 271], [353, 267], [343, 266]], "text": "TENT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "TENT", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166798.jpg", "caption": "a large passenger jet flying in the blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166797.jpg", "caption": "a collage of pictures showing different foods", "annotations": [{"polygon": [[132, 116], [138, 112], [148, 114], [154, 119], [201, 119], [251, 127], [245, 146], [239, 146], [133, 141]], "text": "Strawberry", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Btrawberry", "recog_valid": false, "glyph_recog_text": "Strawberry", "glyph_recog_ld": 0.9000000999999}, {"polygon": [[307, 148], [313, 144], [322, 147], [328, 158], [368, 158], [371, 165], [364, 179], [326, 179], [308, 170]], "text": "Syrup", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "byrup", "recog_valid": false, "glyph_recog_text": "Syrup", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[132, 449], [132, 449], [142, 449], [157, 461], [179, 461], [184, 454], [212, 463], [214, 474], [162, 481], [132, 473]], "text": "Pancales", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Pancakes", "recog_valid": false, "glyph_recog_text": "Pancales", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297872.jpg", "caption": "the chicago bull is on display in chicago", "annotations": [{"polygon": [[327, 57], [330, 81], [401, 91], [400, 72]], "text": "Chicago", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Chicago", "recog_valid": true, "glyph_recog_text": "Chicago", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297877.jpg", "caption": "a woman holding a baseball bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035741.jpg", "caption": "people shopping at a grocery store", "annotations": [{"polygon": [[169, 120], [163, 131], [171, 137], [181, 146], [189, 152], [191, 158], [197, 152], [190, 142], [178, 126], [169, 121]], "text": "Limes", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Limes", "recog_valid": true, "glyph_recog_text": "Ltrers", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297895.jpg", "caption": "a woman in yellow pants and pink jacket skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166828.jpg", "caption": "a refrigerator with a picture of a refrigerator and a picture of a refrigerator", "annotations": [{"polygon": [[290, 39], [320, 65], [333, 69], [338, 61], [329, 45], [322, 37], [322, 31], [311, 25], [294, 26]], "text": "this", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "+is", "recog_valid": false, "glyph_recog_text": "this", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[328, 86], [334, 92], [347, 84], [376, 110], [392, 107], [395, 94], [351, 68]], "text": "just", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "jusf", "recog_valid": false, "glyph_recog_text": "just", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[80, 366], [82, 375], [156, 354], [140, 334], [136, 334], [135, 349], [91, 359]], "text": "much", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "mvel", "recog_valid": false, "glyph_recog_text": "much", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[163, 329], [167, 351], [241, 331], [251, 318], [216, 314]], "text": "better", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "befHer", "recog_valid": false, "glyph_recog_text": "batter", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560055.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428991.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[185, 199], [241, 225], [245, 213], [188, 186]], "text": "BROADWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BROADWN", "recog_valid": false, "glyph_recog_text": "BROADNAY", "glyph_recog_ld": 0.6250004687494141}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297921.jpg", "caption": "a traffic light on a street corner with a building in the background", "annotations": [{"polygon": [[170, 341], [172, 382], [210, 381], [209, 340]], "text": "151", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "151", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166853.jpg", "caption": "a laptop on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166869.jpg", "caption": "a green and yellow fire hydrant", "annotations": [{"polygon": [[186, 240], [192, 234], [225, 270], [221, 274]], "text": "MUNSTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MUNSTER", "recog_valid": true, "glyph_recog_text": "品明", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297964.jpg", "caption": "a woman in a red shirt and blue pants is standing in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429038.jpg", "caption": "three men playing a video game in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297969.jpg", "caption": "a person is kite surfing in the ocean", "annotations": [{"polygon": [[403, 151], [413, 130], [427, 110], [437, 101], [446, 105], [438, 113], [428, 128], [418, 146], [411, 163]], "text": "Best", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "aest", "recog_valid": false, "glyph_recog_text": "Best", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560123.jpg", "caption": "an old man and a young man standing next to a vintage car", "annotations": [{"polygon": [[237, 347], [240, 373], [294, 379], [293, 352]], "text": "7181", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "7181", "recog_valid": true, "glyph_recog_text": "7181", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297979.jpg", "caption": "a man looking at a cat standing next to a water bottle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297981.jpg", "caption": "a group of men sitting on a porch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297984.jpg", "caption": "a pizza with four different toppings in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166915.jpg", "caption": "a white refrigerator with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035844.jpg", "caption": "a bus and a car are driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560137.jpg", "caption": "a woman walking down the sidewalk with a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166932.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298004.jpg", "caption": "a steam train traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560152.jpg", "caption": "a backpack with a cell phone, books, and other items", "annotations": [{"polygon": [[120, 308], [128, 325], [134, 321], [147, 314], [164, 309], [178, 306], [190, 304], [192, 298], [190, 286], [174, 289], [160, 291], [141, 296]], "text": "Pieces", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Pieces", "recog_valid": true, "glyph_recog_text": "Pieces", "glyph_recog_ld": 1.0}, {"polygon": [[118, 291], [123, 305], [136, 300], [147, 293], [162, 288], [170, 288], [189, 285], [188, 275], [160, 277], [131, 283]], "text": "Licorice", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Licosnea", "recog_valid": false, "glyph_recog_text": "Licarice", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560158.jpg", "caption": "a kitchen with a sink, stove and cabinets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560172.jpg", "caption": "a large display case with many different types of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166959.jpg", "caption": "a man standing at a table with a woman and a man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560202.jpg", "caption": "a person skiing down a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429158.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167027.jpg", "caption": "a restaurant with umbrellas and tables outside", "annotations": [{"polygon": [[15, 155], [15, 155], [35, 155], [69, 161], [81, 163], [96, 167], [124, 176], [125, 184], [125, 186], [125, 187], [88, 181], [48, 174], [19, 168]], "text": "THAILAND", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "T HAILAND", "recog_valid": false, "glyph_recog_text": "THAILAND", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167033.jpg", "caption": "a double decker bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035964.jpg", "caption": "three women are preparing food for a table", "annotations": [{"polygon": [[262, 117], [261, 149], [335, 149], [338, 120]], "text": "EARLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EARLY", "recog_valid": true, "glyph_recog_text": "EARLY", "glyph_recog_ld": 1.0}, {"polygon": [[343, 120], [342, 152], [401, 152], [400, 124]], "text": "BIRD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIRD", "recog_valid": true, "glyph_recog_text": "BIRD", "glyph_recog_ld": 1.0}, {"polygon": [[224, 134], [223, 174], [255, 175], [256, 135], [243, 132]], "text": "$", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "P2", "recog_valid": false, "glyph_recog_text": "的", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429184.jpg", "caption": "a white ox with colorful horns standing in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035972.jpg", "caption": "a red fire hydrant on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560262.jpg", "caption": "a bed with a blanket and shoes on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035985.jpg", "caption": "a black and white photo of a car covered in snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035995.jpg", "caption": "a man in uniform shaking hands with another man", "annotations": [{"polygon": [[287, 282], [284, 256], [334, 241], [336, 265]], "text": "AFG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AFC", "recog_valid": false, "glyph_recog_text": "AFG", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[-1, 342], [41, 330], [50, 373], [-1, 392]], "text": "T", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "T", "recog_valid": true, "glyph_recog_text": "T", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298147.jpg", "caption": "two men are standing next to a garbage truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429226.jpg", "caption": "a man in blue shirt and tan shorts playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298154.jpg", "caption": "a yellow and blue train sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167083.jpg", "caption": "the old town house, the old town house, the old town house, the old town house, the old town house, the old town house, the old", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298160.jpg", "caption": "a man dressed in a traditional outfit is walking a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429242.jpg", "caption": "a woman and a child are feeding a giraffe", "annotations": [{"polygon": [[142, 417], [137, 448], [179, 449], [182, 423]], "text": "7", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "一", "recog_valid": false, "glyph_recog_text": "7", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036029.jpg", "caption": "a car that has been hit by a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429251.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429256.jpg", "caption": "people flying kites on a beach with a few people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167118.jpg", "caption": "a mirror reflecting a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036057.jpg", "caption": "a person riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560347.jpg", "caption": "a black and white photo of people walking on a train platform", "annotations": [{"polygon": [[53, 201], [108, 212], [104, 244], [50, 236]], "text": "renfe", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "renfe", "recog_valid": true, "glyph_recog_text": "renfe", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167133.jpg", "caption": "a train engine sitting on the tracks", "annotations": [{"polygon": [[193, 336], [243, 336], [243, 370], [192, 367]], "text": "980", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "980", "recog_valid": true, "glyph_recog_text": "980", "glyph_recog_ld": 1.0}, {"polygon": [[279, 338], [328, 339], [328, 370], [276, 369], [277, 339]], "text": "006", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "006", "recog_valid": true, "glyph_recog_text": "006", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560366.jpg", "caption": "a blue and yellow train on the tracks", "annotations": [{"polygon": [[162, 207], [162, 234], [221, 232], [222, 204]], "text": "500", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5b0", "recog_valid": false, "glyph_recog_text": "500", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429305.jpg", "caption": "a clock on a sign that says the optical center", "annotations": [{"polygon": [[187, 134], [200, 131], [200, 136], [203, 136], [204, 130], [253, 115], [254, 97], [249, 98], [248, 105], [224, 111], [224, 108], [214, 109], [212, 114], [198, 119], [191, 116], [187, 120], [184, 125], [184, 130], [185, 133]], "text": "Optical", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Optical", "recog_valid": true, "glyph_recog_text": "Optica!", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[214, 132], [251, 122], [277, 117], [273, 131], [212, 150], [207, 144], [207, 138]], "text": "Center", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Center", "recog_valid": true, "glyph_recog_text": "Center", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036091.jpg", "caption": "a kitchen with wooden floors and a center island", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036098.jpg", "caption": "a man holding a baseball bat", "annotations": [{"polygon": [[428, 109], [428, 197], [458, 198], [457, 111]], "text": "HO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SOH", "recog_valid": false, "glyph_recog_text": "工O", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[332, 150], [329, 172], [429, 139], [430, 122]], "text": "TRITON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IRIION", "recog_valid": false, "glyph_recog_text": "TRITON", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429318.jpg", "caption": "a large pile of old suitcases and other items", "annotations": [{"polygon": [[469, 63], [469, 63], [458, 86], [470, 99], [488, 98], [494, 78], [494, 64]], "text": "a", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "c", "recog_valid": false, "glyph_recog_text": "a", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429319.jpg", "caption": "a man standing in front of a red airplane", "annotations": [{"polygon": [[224, 210], [224, 247], [320, 245], [320, 197]], "text": "KAFROPORT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AHLPLHT", "recog_valid": false, "glyph_recog_text": "KAFROPORT", "glyph_recog_ld": 0.333334074073251}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036110.jpg", "caption": "a neon clock with the words brainwash now", "annotations": [{"polygon": [[223, 128], [257, 122], [263, 156], [229, 163]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "3", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[347, 219], [355, 257], [377, 251], [372, 213]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "00", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[375, 310], [369, 274], [340, 279], [344, 315]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "4", "recog_valid": true, "glyph_recog_text": "4", "glyph_recog_ld": 1.0}, {"polygon": [[172, 392], [158, 483], [224, 495], [316, 493], [395, 447], [448, 407], [400, 332], [341, 386], [311, 394]], "text": "NOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "NOV", "recog_valid": false, "glyph_recog_text": "NOW", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[310, 327], [313, 364], [344, 351], [339, 323]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5", "recog_valid": true, "glyph_recog_text": "5", "glyph_recog_ld": 1.0}, {"polygon": [[258, 348], [266, 378], [291, 376], [286, 342]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6", "recog_valid": true, "glyph_recog_text": "6", "glyph_recog_ld": 1.0}, {"polygon": [[160, 310], [168, 344], [193, 345], [190, 304]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[135, 256], [144, 294], [167, 288], [163, 255]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0", "recog_valid": false, "glyph_recog_text": "9", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[133, 198], [141, 233], [172, 230], [172, 192]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "10", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[175, 149], [181, 186], [208, 182], [198, 145]], "text": "11", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "一、", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[5, 257], [94, 256], [103, 198], [124, 166], [160, 121], [211, 93], [265, 81], [331, 94], [371, 124], [427, 207], [508, 179], [440, 67], [365, 22], [228, 5], [122, 40], [64, 92], [15, 166], [1, 258]], "text": "BRAINWASH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "ERAINWASH", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429332.jpg", "caption": "a calico cat laying on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429334.jpg", "caption": "a cat sitting on a table with carrots and a caption that says oh hai i went to farmer's market", "annotations": [{"polygon": [[264, 176], [264, 176], [314, 177], [314, 217], [264, 217]], "text": "OH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OH", "recog_valid": true, "glyph_recog_text": "OH", "glyph_recog_ld": 1.0}, {"polygon": [[323, 178], [383, 177], [384, 216], [322, 217]], "text": "HAI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HAI", "recog_valid": true, "glyph_recog_text": "HAI", "glyph_recog_ld": 1.0}, {"polygon": [[150, 338], [269, 339], [270, 369], [150, 369]], "text": "FARMERZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FARMERZ", "recog_valid": true, "glyph_recog_text": "FARMERZ", "glyph_recog_ld": 1.0}, {"polygon": [[11, 385], [118, 386], [114, 414], [11, 415]], "text": "MARKIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "MARKIT", "recog_valid": true, "glyph_recog_text": "MARKIT", "glyph_recog_ld": 1.0}, {"polygon": [[26, 338], [104, 338], [105, 348], [101, 370], [29, 369]], "text": "WENT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "WENT", "recog_valid": true, "glyph_recog_text": "WENT", "glyph_recog_ld": 1.0}, {"polygon": [[109, 340], [114, 370], [144, 369], [143, 339]], "text": "TO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TO", "recog_valid": true, "glyph_recog_text": "TO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429335.jpg", "caption": "hot and tasty - food and drink video", "annotations": [{"polygon": [[196, 46], [198, 43], [200, 42], [238, 42], [308, 46], [316, 46], [317, 83], [194, 84]], "text": "HOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Hot", "recog_valid": false, "glyph_recog_text": "HOT", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[215, 110], [221, 107], [261, 107], [277, 107], [285, 102], [290, 99], [297, 99], [298, 124], [301, 128], [290, 131], [214, 131], [211, 126], [213, 113]], "text": "and", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "and", "recog_valid": true, "glyph_recog_text": "and", "glyph_recog_ld": 1.0}, {"polygon": [[179, 141], [208, 141], [293, 146], [339, 154], [341, 156], [339, 160], [337, 162], [334, 163], [331, 174], [326, 185], [320, 192], [311, 192], [306, 190], [304, 184], [291, 183], [253, 182], [181, 182], [180, 180], [171, 154], [170, 149], [174, 143]], "text": "TASTY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tasty", "recog_valid": false, "glyph_recog_text": "TASTY", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560409.jpg", "caption": "a kitchen with a granite counter top and a chandelier", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298269.jpg", "caption": "a small airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429353.jpg", "caption": "a trolley car is parked in front of a large hotel", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298303.jpg", "caption": "a large military plane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298306.jpg", "caption": "three young people posing for a photo on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560459.jpg", "caption": "an airplane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429399.jpg", "caption": "a train is pulling into a station with a clock on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298347.jpg", "caption": "a picture of a sandwich", "annotations": [{"polygon": [[99, 133], [99, 133], [149, 139], [152, 116], [97, 108], [96, 133]], "text": "FONG", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "FONG", "recog_valid": true, "glyph_recog_text": "FONG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036218.jpg", "caption": "a man in cowboy hat and jeans is trying to pull a cow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167295.jpg", "caption": "a man sitting at a table with a pizza and a beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560517.jpg", "caption": "a plate with green vegetables and sauce on it", "annotations": [{"polygon": [[396, 239], [393, 253], [403, 260], [415, 267], [426, 271], [440, 276], [456, 281], [458, 267], [448, 263], [434, 259], [412, 248]], "text": "Ngizhigu", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Naizhigu", "recog_valid": false, "glyph_recog_text": "Ngizhigu", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560525.jpg", "caption": "a man laying on a bed with a book and a book", "annotations": [{"polygon": [[168, 268], [187, 269], [214, 293], [238, 288], [232, 271], [212, 255], [190, 258]], "text": "PHANTOY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "PHANTOY", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036237.jpg", "caption": "a horse pulling a carriage on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560532.jpg", "caption": "a hot dog and soda on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167330.jpg", "caption": "a stop sign with a street sign above it", "annotations": [{"polygon": [[226, 230], [216, 289], [343, 338], [345, 280]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[154, 216], [220, 193], [218, 209], [151, 237]], "text": "Electric", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Electric", "recog_valid": true, "glyph_recog_text": "Electric", "glyph_recog_ld": 1.0}, {"polygon": [[252, 382], [278, 391], [285, 419], [245, 408]], "text": "ALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ALL", "recog_valid": true, "glyph_recog_text": "ALL", "glyph_recog_ld": 1.0}, {"polygon": [[294, 396], [336, 406], [338, 429], [298, 422]], "text": "WAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WAR", "recog_valid": true, "glyph_recog_text": "WAR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298418.jpg", "caption": "three people posing for a photo with skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167354.jpg", "caption": "a stop sign on a street corner with a building in the background", "annotations": [{"polygon": [[220, 229], [216, 265], [222, 267], [270, 266], [276, 266], [280, 236], [279, 225], [278, 224], [254, 226], [221, 229]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560571.jpg", "caption": "a red double decker bus with a large eye on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167355.jpg", "caption": "a desk with a computer and a chair in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429514.jpg", "caption": "a laptop computer and a cell phone on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167375.jpg", "caption": "a man and a woman holding tennis rackets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429521.jpg", "caption": "a traffic light with no turn signal on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298458.jpg", "caption": "a group of children playing wii in a library", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560615.jpg", "caption": "police officers on motorcycles in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298468.jpg", "caption": "a wooden boat docked at a dock with other boats", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167411.jpg", "caption": "a man is picking apples from a box", "annotations": [{"polygon": [[427, 441], [429, 469], [511, 467], [506, 439], [428, 441]], "text": "WENK", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "XN3M", "recog_valid": false, "glyph_recog_text": "WENK", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429559.jpg", "caption": "espresso machine espresso coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429585.jpg", "caption": "a black and white photo of a park with a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036373.jpg", "caption": "a boy in a baseball uniform is about to catch a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429606.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298545.jpg", "caption": "a man holding a tennis racket on a tennis court", "annotations": [{"polygon": [[133, 234], [135, 265], [82, 268], [82, 238]], "text": "TEC", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ITEC", "recog_valid": false, "glyph_recog_text": "TEC", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560691.jpg", "caption": "a group of people in a restaurant with food", "annotations": [{"polygon": [[230, 115], [228, 137], [245, 134], [302, 124], [303, 95], [266, 104]], "text": "kitchen", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Kirheu", "recog_valid": false, "glyph_recog_text": "kitchen", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167480.jpg", "caption": "a refrigerator on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036417.jpg", "caption": "a banana with writing on it", "annotations": [{"polygon": [[103, 303], [121, 309], [131, 310], [140, 315], [169, 322], [222, 323], [205, 350], [143, 346], [132, 341], [89, 330], [82, 317], [90, 304]], "text": "CECIN'EST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CECI N'EST", "recog_valid": false, "glyph_recog_text": "CECIN'EST", "glyph_recog_ld": 0.9000000999999}, {"polygon": [[226, 323], [260, 323], [299, 315], [310, 317], [318, 339], [278, 342], [258, 350], [220, 348]], "text": "PASUNE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PAS UNE", "recog_valid": false, "glyph_recog_text": "PASUNE", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[324, 314], [422, 285], [433, 295], [427, 311], [398, 328], [375, 331], [326, 341]], "text": "BANANA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BANA人A", "recog_valid": false, "glyph_recog_text": "BANANA", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167489.jpg", "caption": "a man wearing a blue shirt and tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560713.jpg", "caption": "a boat is docked at a dock with people on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036439.jpg", "caption": "a young man sitting on the floor with a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429658.jpg", "caption": "a man on a bike is riding past a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298588.jpg", "caption": "a train on the tracks with a blue and yellow train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429672.jpg", "caption": "a large white truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560754.jpg", "caption": "a green grassy area with a soccer ball and luggage", "annotations": [{"polygon": [[306, 427], [295, 449], [399, 484], [406, 460]], "text": "Global", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Slobal", "recog_valid": false, "glyph_recog_text": "Global", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[420, 466], [413, 490], [448, 500], [449, 477]], "text": "EX", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "E", "recog_valid": false, "glyph_recog_text": "EX", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[396, 293], [381, 343], [380, 346], [448, 363], [450, 303]], "text": "T", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "T", "recog_valid": true, "glyph_recog_text": "T", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036469.jpg", "caption": "a man in a white shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429691.jpg", "caption": "a squirrel statue", "annotations": [{"polygon": [[202, 228], [341, 137], [325, 89], [184, 190]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "non", "recog_valid": false, "glyph_recog_text": "W", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[352, 98], [373, 84], [427, 86], [361, 123]], "text": "54", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "540", "recog_valid": false, "glyph_recog_text": "5 4", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[213, 122], [221, 177], [335, 251], [332, 203]], "text": "Rice", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Rice", "recog_valid": true, "glyph_recog_text": "RTce", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036482.jpg", "caption": "a man in military uniform standing next to a bike", "annotations": [{"polygon": [[256, 441], [257, 451], [307, 407], [333, 379], [329, 372], [303, 398], [282, 417]], "text": "CANNONDALE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "CAODOODALE", "recog_valid": false, "glyph_recog_text": "GANNOHDALE", "glyph_recog_ld": 0.6000003999996}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429705.jpg", "caption": "a young boy playing with a large ball in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429709.jpg", "caption": "a cow is looking over the fence at the camera", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429715.jpg", "caption": "a pair of scissors hanging on a wooden rack", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036500.jpg", "caption": "a small airplane on a rocky beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429735.jpg", "caption": "a computer desk with a monitor and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429746.jpg", "caption": "a person laying in bed reading a book", "annotations": [{"polygon": [[327, 184], [337, 177], [371, 235], [361, 243]], "text": "WEIRDOS FROM ANOTHER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WEIRDOS", "recog_valid": false, "glyph_recog_text": "thedstera/erylo", "glyph_recog_ld": 6.666662222265529e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036533.jpg", "caption": "a large jetliner flying through the air", "annotations": [{"polygon": [[402, 431], [402, 460], [500, 458], [500, 431], [404, 429]], "text": "GEEK", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "GEEK", "recog_valid": true, "glyph_recog_text": "GEEK", "glyph_recog_ld": 1.0}, {"polygon": [[419, 463], [418, 497], [500, 497], [502, 463], [422, 463]], "text": "NET", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "NET", "recog_valid": true, "glyph_recog_text": "NET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298692.jpg", "caption": "a little girl getting on a school bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429769.jpg", "caption": "a parade of people in a parade with a fire truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298711.jpg", "caption": "a large pile of oranges", "annotations": [{"polygon": [[187, 200], [217, 194], [237, 196], [243, 214], [226, 228], [190, 229]], "text": "0.95", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "095", "recog_valid": false, "glyph_recog_text": "0.95", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167642.jpg", "caption": "a kitchen with a sink and a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167644.jpg", "caption": "a man standing next to a row of motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560885.jpg", "caption": "three people riding horses on a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167675.jpg", "caption": "a woman looking at her cell phone while standing on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560898.jpg", "caption": "a bunch of orange and white umbrellas", "annotations": [{"polygon": [[176, 89], [188, 87], [206, 121], [195, 122]], "text": "SOS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SCDS", "recog_valid": false, "glyph_recog_text": "803", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[365, 193], [367, 198], [407, 165], [406, 160]], "text": "LAROCHE.POSAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AROARTOAI LOPNN", "recog_valid": false, "glyph_recog_text": "at", "glyph_recog_ld": 6.666662222265529e-07}, {"polygon": [[362, 188], [366, 193], [403, 162], [401, 156]], "text": "LAROCHE-POSAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LAROCHE-POKA", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 8.333326388942908e-07}, {"polygon": [[72, 146], [64, 156], [87, 186], [98, 172]], "text": "SOS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SOS", "recog_valid": true, "glyph_recog_text": "SOS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429829.jpg", "caption": "two women on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167734.jpg", "caption": "a man and a child on the beach with a kite", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298810.jpg", "caption": "a red fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429892.jpg", "caption": "a television mounted on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560969.jpg", "caption": "a young boy eating a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560978.jpg", "caption": "a family posing with a television and a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298840.jpg", "caption": "a close up of a control panel with gauges and knobs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560989.jpg", "caption": "a sign for big mama's burritos hangs from the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298848.jpg", "caption": "a scooter parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167795.jpg", "caption": "a group of people walking down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036725.jpg", "caption": "a large jet airplane taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036736.jpg", "caption": "taos opens to snowboarding march 18, 2009", "annotations": [{"polygon": [[76, 298], [76, 298], [164, 286], [169, 313], [85, 332]], "text": "Opens", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "6acns", "recog_valid": false, "glyph_recog_text": "Opens", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[223, 272], [223, 272], [228, 301], [228, 301], [433, 266], [427, 236]], "text": "TAOS SNOWBOAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Snowboerding", "recog_valid": false, "glyph_recog_text": "TAOS SNOWBOAR", "glyph_recog_ld": 7.692301774442356e-07}, {"polygon": [[165, 342], [165, 342], [245, 332], [240, 307], [160, 318]], "text": "March", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Merch", "recog_valid": false, "glyph_recog_text": "March", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[116, 212], [113, 256], [221, 258], [220, 212]], "text": "SKI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SOKI", "recog_valid": false, "glyph_recog_text": "SKI", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[230, 213], [245, 258], [394, 256], [392, 212]], "text": "TAOS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TAOS", "recog_valid": true, "glyph_recog_text": "TAOS", "glyph_recog_ld": 1.0}, {"polygon": [[302, 298], [306, 323], [367, 315], [364, 291]], "text": "2008", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2008", "recog_valid": true, "glyph_recog_text": "2008", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167813.jpg", "caption": "hot dog stick, los angeles, ca", "annotations": [{"polygon": [[92, 102], [117, 102], [120, 120], [135, 116], [153, 116], [147, 151], [140, 151], [123, 152], [92, 152]], "text": "HOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HOT", "recog_valid": true, "glyph_recog_text": "HOT", "glyph_recog_ld": 1.0}, {"polygon": [[165, 99], [185, 97], [191, 109], [194, 115], [204, 112], [222, 111], [228, 119], [228, 142], [225, 146], [218, 148], [165, 150]], "text": "Doc", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Doc", "recog_valid": true, "glyph_recog_text": "Doc", "glyph_recog_ld": 1.0}, {"polygon": [[244, 95], [256, 92], [266, 104], [264, 109], [335, 105], [337, 144], [242, 146], [236, 134], [237, 104]], "text": "STICK", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STICK", "recog_valid": true, "glyph_recog_text": "STICK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167819.jpg", "caption": "shell's new chief executive officer, ben van beurden, is a former head of the company's petrochemicals division", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036748.jpg", "caption": "a group of men playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298906.jpg", "caption": "a man and a child riding a bike down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429980.jpg", "caption": "a model train is on the tracks near a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298913.jpg", "caption": "a black and white photo of a zebra", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429991.jpg", "caption": "two skis with bindings on them", "annotations": [{"polygon": [[18, 339], [33, 308], [224, 250], [223, 273]], "text": "movement", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "TAcIoeiny", "recog_valid": false, "glyph_recog_text": "movement", "glyph_recog_ld": 0.22222308641879285}, {"polygon": [[383, 168], [383, 178], [393, 176], [487, 142], [487, 131]], "text": "EXPLORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CXFLOORC", "recog_valid": false, "glyph_recog_text": "Exeie8e", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167851.jpg", "caption": "a red bus is parked next to a green bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167856.jpg", "caption": "a large airplane flying through the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167862.jpg", "caption": "a man on a skateboard doing a trick in a pool", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561101.jpg", "caption": "a man sitting at a table with red chairs and a boat in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167903.jpg", "caption": "a man in white playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036841.jpg", "caption": "a dorm room with a bed, desk and a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430076.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561151.jpg", "caption": "a motorcycle parked in front of a red wall", "annotations": [{"polygon": [[262, 285], [299, 280], [314, 269], [317, 261], [312, 250], [253, 261], [246, 275], [250, 283]], "text": "Virago", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "eVingd", "recog_valid": false, "glyph_recog_text": "Viragg", "glyph_recog_ld": 0.3333344444425925}, {"polygon": [[246, 254], [295, 242], [312, 241], [289, 295]], "text": "Vinago", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Vinag", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299010.jpg", "caption": "a street sign with a one way street sign on it", "annotations": [{"polygon": [[215, 162], [215, 198], [303, 184], [297, 148], [215, 162]], "text": "ONE ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": false, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[357, 136], [366, 170], [447, 155], [451, 119], [358, 135]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[66, 191], [67, 228], [187, 256], [179, 216], [68, 190]], "text": "NIKOLA ", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NIKOLA", "recog_valid": false, "glyph_recog_text": "NIKOLA", "glyph_recog_ld": 1.0}, {"polygon": [[204, 222], [203, 258], [295, 279], [288, 242], [205, 223]], "text": "TESLA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TESLA", "recog_valid": true, "glyph_recog_text": "TESLA", "glyph_recog_ld": 1.0}, {"polygon": [[151, 252], [152, 273], [226, 288], [223, 268], [152, 253]], "text": "CORNER ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CORNER", "recog_valid": false, "glyph_recog_text": "CORNER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430094.jpg", "caption": "a yellow bus and a gas pump", "annotations": [{"polygon": [[449, 237], [445, 289], [496, 288], [502, 249]], "text": "BOI", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "30斤", "recog_valid": false, "glyph_recog_text": "BOI", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[451, 208], [450, 236], [466, 243], [493, 247], [500, 245], [502, 210], [470, 208]], "text": "CAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CAS", "recog_valid": true, "glyph_recog_text": "CAS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299024.jpg", "caption": "a stop sign and a street sign", "annotations": [{"polygon": [[44, 180], [34, 255], [148, 264], [154, 194]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[257, 221], [259, 259], [357, 264], [356, 227]], "text": "David", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "David", "recog_valid": true, "glyph_recog_text": "David", "glyph_recog_ld": 1.0}, {"polygon": [[374, 230], [377, 267], [414, 268], [412, 232]], "text": "Rd", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Rd", "recog_valid": true, "glyph_recog_text": "Rd", "glyph_recog_ld": 1.0}, {"polygon": [[191, 328], [196, 379], [315, 377], [308, 333]], "text": "Copus", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Copus", "recog_valid": true, "glyph_recog_text": "Copus", "glyph_recog_ld": 1.0}, {"polygon": [[330, 331], [332, 370], [374, 368], [371, 328]], "text": "Rd", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Rd", "recog_valid": true, "glyph_recog_text": "Rd", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430097.jpg", "caption": "a large military plane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299026.jpg", "caption": "a man riding a bicycle on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561190.jpg", "caption": "a group of men playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299050.jpg", "caption": "a military jet flying through the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036907.jpg", "caption": "a crowd of people watching a small plane fly over", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299051.jpg", "caption": "1936 chevrolet 3100 pickup truck - image 1 of 6", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167996.jpg", "caption": "a blue bus driving down a street in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430157.jpg", "caption": "a man feeding a cow with a child in a stroller", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036948.jpg", "caption": "two people riding snowmobiles on a snowy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036950.jpg", "caption": "a bus driving down a road with a few cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299111.jpg", "caption": "a green and yellow airplane on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299115.jpg", "caption": "a collage of pictures of a dog sitting on a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299122.jpg", "caption": "two children sitting at a table with a large pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299134.jpg", "caption": "a man riding a horse down a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430215.jpg", "caption": "a man eating breakfast at a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430220.jpg", "caption": "a large airplane sitting on top of a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037012.jpg", "caption": "a woman walking down a road next to a food truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168084.jpg", "caption": "a bench sitting on a wet beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037015.jpg", "caption": "a large display case with a lot of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561300.jpg", "caption": "a bag with a cell phone, keys, and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168090.jpg", "caption": "a group of boys playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168097.jpg", "caption": "a baseball game is in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037032.jpg", "caption": "a group of kids playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037035.jpg", "caption": "a kitchen with a stove, oven and sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299180.jpg", "caption": "a black cat laying on a box of pizza", "annotations": [{"polygon": [[111, 359], [107, 381], [126, 400], [151, 423], [155, 406]], "text": "Dizza", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Dean", "recog_valid": false, "glyph_recog_text": "Dizza", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561324.jpg", "caption": "a woman holding a giraffe balloon", "annotations": [{"polygon": [[156, 258], [163, 212], [192, 217], [284, 241], [264, 286]], "text": "giraffe", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "gir affe", "recog_valid": false, "glyph_recog_text": "giraffe", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037039.jpg", "caption": "a woman cutting a cake at a table", "annotations": [{"polygon": [[336, 357], [384, 375], [374, 398], [326, 376]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430260.jpg", "caption": "a fire hydrant sitting on the side of a sidewalk", "annotations": [{"polygon": [[214, 392], [309, 392], [308, 425], [214, 428], [218, 394]], "text": "700", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "800", "recog_valid": false, "glyph_recog_text": "700", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[499, 424], [345, 425], [335, 413], [345, 410], [349, 420], [356, 419], [358, 406], [358, 395], [354, 393], [349, 395], [345, 395], [344, 388], [449, 392], [499, 394], [499, 424]], "text": "JONES", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "UONES", "recog_valid": false, "glyph_recog_text": "JONES", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037062.jpg", "caption": "a group of people standing around a truck", "annotations": [{"polygon": [[56, 184], [92, 180], [91, 219], [56, 220]], "text": "Budgt", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Badget", "recog_valid": false, "glyph_recog_text": ":", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561352.jpg", "caption": "a group of people on motorcycles in a busy city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037104.jpg", "caption": "a silver train car sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430325.jpg", "caption": "a stop sign with a blue sky in the background", "annotations": [{"polygon": [[252, 318], [242, 385], [432, 370], [424, 305]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430335.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561407.jpg", "caption": "a young boy in a baseball uniform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037123.jpg", "caption": "a boat is docked at a dock at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430338.jpg", "caption": "a bag with a camera, passport, passport cover, passport, passport cover, passport cover, passport cover, passport cover, passport cover, passport cover, passport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299270.jpg", "caption": "a horse pulling a cart down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430343.jpg", "caption": "a motorcycle parked in a garage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430346.jpg", "caption": "a skateboarder is doing a trick on a ramp", "annotations": [{"polygon": [[361, 299], [369, 293], [411, 297], [477, 313], [496, 323], [478, 343], [457, 337], [437, 328], [409, 322], [394, 317], [368, 316]], "text": "SKAFESKUFE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "3AH8-S4Fe", "recog_valid": false, "glyph_recog_text": "SKAFESKUFE", "glyph_recog_ld": 0.20000079999919995}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037140.jpg", "caption": "a man and a woman posing with tennis rackets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037157.jpg", "caption": "a black and white photo of a bag with scissors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037169.jpg", "caption": "a red bus driving down a street next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561464.jpg", "caption": "people standing around a bar with wine bottles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430396.jpg", "caption": "a bowl of lemons and bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561468.jpg", "caption": "a black bear crossing the road near a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037198.jpg", "caption": "a woman is working on a project in her office", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299343.jpg", "caption": "a bus driving down a road with trees and bushes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168287.jpg", "caption": "a woman standing in a bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168296.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299382.jpg", "caption": "a sign that says gladiator street", "annotations": [{"polygon": [[88, 180], [264, 209], [261, 240], [88, 207]], "text": "GLADIATOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GLADIATOR", "recog_valid": true, "glyph_recog_text": "GLADIATOR", "glyph_recog_ld": 1.0}, {"polygon": [[274, 211], [433, 236], [429, 273], [273, 243]], "text": "STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STREET", "recog_valid": true, "glyph_recog_text": "STREET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561527.jpg", "caption": "a train car sitting on the tracks in a desert", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430472.jpg", "caption": "a traffic light and a sign on a brick wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430476.jpg", "caption": "a shop with umbrellas and other items on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561551.jpg", "caption": "a man is loading a boat on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168335.jpg", "caption": "a snowboarder is doing a trick on a mountain", "annotations": [{"polygon": [[230, 310], [245, 329], [214, 347], [197, 354], [188, 338]], "text": "RIOE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2018", "recog_valid": false, "glyph_recog_text": "RIOE", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037282.jpg", "caption": "a man and woman cutting a wedding cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037286.jpg", "caption": "a group of people standing around a table with bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299433.jpg", "caption": "an air canada airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299441.jpg", "caption": "a street sign with a directional arrow pointing to a building", "annotations": [{"polygon": [[159, 391], [159, 423], [208, 420], [206, 389]], "text": "den", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "den", "recog_valid": true, "glyph_recog_text": "den", "glyph_recog_ld": 1.0}, {"polygon": [[142, 109], [309, 107], [308, 142], [142, 136]], "text": "Reichstag", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Reichstag", "recog_valid": true, "glyph_recog_text": "Reichstag", "glyph_recog_ld": 1.0}, {"polygon": [[137, 202], [137, 202], [321, 201], [320, 236], [136, 230]], "text": "Kreuzberg", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Kreuzberg", "recog_valid": true, "glyph_recog_text": "Kreuzberg", "glyph_recog_ld": 1.0}, {"polygon": [[123, 330], [123, 330], [311, 328], [311, 300], [122, 300]], "text": "Bundesrat", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bundesrat", "recog_valid": true, "glyph_recog_text": "Bundesrat", "glyph_recog_ld": 1.0}, {"polygon": [[85, 375], [226, 373], [230, 351], [232, 343], [84, 343]], "text": "Potsdamer", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Potsdamer", "recog_valid": true, "glyph_recog_text": "Potsdamer", "glyph_recog_ld": 1.0}, {"polygon": [[222, 390], [222, 421], [314, 419], [310, 389]], "text": "Linden", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Linden", "recog_valid": true, "glyph_recog_text": "Linden", "glyph_recog_ld": 1.0}, {"polygon": [[70, 391], [68, 423], [142, 421], [143, 390]], "text": "Unter", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Unter", "recog_valid": true, "glyph_recog_text": "Unter", "glyph_recog_ld": 1.0}, {"polygon": [[246, 344], [244, 374], [312, 373], [311, 343]], "text": "Platz", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Platz", "recog_valid": true, "glyph_recog_text": "Platz", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299443.jpg", "caption": "a person riding a dirt bike in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168387.jpg", "caption": "a cat sitting on a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430531.jpg", "caption": "two black containers with rice and meat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299466.jpg", "caption": "a plate with an egg and a glass of orange juice", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561624.jpg", "caption": "a boy swinging a baseball bat on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168413.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561635.jpg", "caption": "a book with a picture of a galaxy", "annotations": [{"polygon": [[195, 89], [196, 188], [233, 189], [224, 87]], "text": "pective", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "aAinpad", "recog_valid": false, "glyph_recog_text": "Q.o.", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[400, 420], [400, 465], [424, 461], [507, 428], [508, 416], [494, 396]], "text": "Canlo", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Caw", "recog_valid": false, "glyph_recog_text": "Canlo", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[182, 207], [183, 423], [149, 413], [151, 201]], "text": "Astronom", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ouoinse", "recog_valid": false, "glyph_recog_text": "Cnwloco", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168422.jpg", "caption": "a plate of food", "annotations": [{"polygon": [[80, 205], [80, 205], [99, 202], [80, 127], [61, 130]], "text": "smart", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "news", "recog_valid": false, "glyph_recog_text": "smart", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168430.jpg", "caption": "a red and white train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037358.jpg", "caption": "a statue of a man on a horse is in the middle of a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561648.jpg", "caption": "a street sign with a street name on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430577.jpg", "caption": "a young man is playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037377.jpg", "caption": "a bunch of bananas with stickers on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430617.jpg", "caption": "a baseball player swinging at a pitch during a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037401.jpg", "caption": "a person riding a dirt bike on a muddy trail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168475.jpg", "caption": "a couple of airplanes parked on a snowy runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430621.jpg", "caption": "a baseball player is standing on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561713.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[143, 185], [174, 184], [173, 211], [145, 215], [141, 191]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "10", "recog_valid": true, "glyph_recog_text": "10", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037429.jpg", "caption": "a man and a woman holding wine glasses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168505.jpg", "caption": "a man sitting on a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168506.jpg", "caption": "a man holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430654.jpg", "caption": "a man flying a red kite on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561731.jpg", "caption": "a boy eating cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430660.jpg", "caption": "a group of skiers are racing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561735.jpg", "caption": "a street with two traffic lights and a crosswalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168521.jpg", "caption": "a black and white photo of people walking in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430692.jpg", "caption": "a variety of items including a wallet, cell phone, and other items", "annotations": [{"polygon": [[339, 373], [339, 373], [346, 378], [376, 346], [369, 340], [338, 373]], "text": "tokidok", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "tokidok", "recog_valid": true, "glyph_recog_text": "5p319uk", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299648.jpg", "caption": "a crowd of people at a concert with their hands up", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561789.jpg", "caption": "a red double decker bus parked next to a car", "annotations": [{"polygon": [[293, 269], [299, 356], [334, 356], [373, 336], [401, 342], [428, 333], [425, 266]], "text": "CO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "C&", "recog_valid": false, "glyph_recog_text": "co", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299657.jpg", "caption": "a red train car with a sign that says port amarilse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299679.jpg", "caption": "a baseball player on the field with a glove", "annotations": [{"polygon": [[76, -1], [118, 1], [119, 41], [78, 41]], "text": "IN", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "IN", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[42, 49], [150, 47], [148, 91], [106, 92], [41, 91]], "text": "PAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "PARAI", "recog_valid": false, "glyph_recog_text": "PAR", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[236, 9], [353, 8], [353, 39], [237, 37]], "text": "POPPED", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "POPPED", "recog_valid": true, "glyph_recog_text": "POPPED", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168618.jpg", "caption": "the clock in the lobby of the state house in baltimore", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299704.jpg", "caption": "a clock on a pole next to a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561861.jpg", "caption": "two different pictures of a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561864.jpg", "caption": "a tennis player is about to hit the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561885.jpg", "caption": "a double decker bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430818.jpg", "caption": "a cat laying on a carpet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561894.jpg", "caption": "a kitchen with a refrigerator and a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561917.jpg", "caption": "a street with cars and people at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299780.jpg", "caption": "a red and white stop sign", "annotations": [{"polygon": [[389, 301], [412, 263], [408, 234], [251, 238], [247, 295], [264, 306]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S10P", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168717.jpg", "caption": "the temple bar, dublin", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561947.jpg", "caption": "a group of people standing around a table with a variety of pastries", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430889.jpg", "caption": "a computer desk with a laptop and a monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430890.jpg", "caption": "a delta airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168754.jpg", "caption": "a pan with vegetables and a bowl of soup on the stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168763.jpg", "caption": "two men on horses", "annotations": [{"polygon": [[153, 171], [175, 137], [237, 115], [292, 138], [310, 167], [280, 187], [251, 169], [219, 170], [206, 179]], "text": "PUEBLO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "%", "recog_valid": false, "glyph_recog_text": "PUEBLO", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[202, 185], [197, 198], [209, 226], [234, 237], [259, 228], [270, 203], [264, 186], [249, 169], [220, 169]], "text": "G", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "G", "recog_valid": true, "glyph_recog_text": "n", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430932.jpg", "caption": "a person holding a cell phone with a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037719.jpg", "caption": "a woman sitting at a table with a cake on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562014.jpg", "caption": "a woman in a business suit holding a tray of food", "annotations": [{"polygon": [[132, 42], [141, 18], [218, 36], [213, 59]], "text": "Menu", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Meny", "recog_valid": false, "glyph_recog_text": "Menu", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430950.jpg", "caption": "a man holding a knife and fork", "annotations": [{"polygon": [[134, 385], [172, 356], [216, 339], [259, 328], [332, 315], [398, 307], [410, 363], [345, 375], [298, 383], [239, 396], [195, 413], [169, 421]], "text": "FORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ORELIA", "recog_valid": false, "glyph_recog_text": "FORE", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168828.jpg", "caption": "a man riding an elephant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430974.jpg", "caption": "a pair of skis on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037771.jpg", "caption": "a man wearing a red bus helmet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168845.jpg", "caption": "a young boy holding skis and poles in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299920.jpg", "caption": "a group of people skateboarding in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299924.jpg", "caption": "htc one x review", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299931.jpg", "caption": "a man in a red shirt is throwing a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431009.jpg", "caption": "a desk with two laptops and a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431012.jpg", "caption": "two motorcycles racing on a track", "annotations": [{"polygon": [[194, 176], [194, 176], [351, 181], [342, 243], [185, 236]], "text": "Hertz", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Hertz", "recog_valid": true, "glyph_recog_text": "Hertz", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562092.jpg", "caption": "a cow with a tag on its ear standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431025.jpg", "caption": "a clock in a building with statues on the walls", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562106.jpg", "caption": "a green motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299962.jpg", "caption": "a person flying a kite in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037826.jpg", "caption": "a man holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168903.jpg", "caption": "a busy street with many signs and cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168900.jpg", "caption": "a truck driving down the street with people standing behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168905.jpg", "caption": "a street sign with the words future street and here", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562138.jpg", "caption": "a bus is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300028.jpg", "caption": "a man walking across a street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562174.jpg", "caption": "a man on a skateboard riding around cones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431115.jpg", "caption": "a black and white photo of a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431133.jpg", "caption": "a cow is standing in the middle of a busy city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168993.jpg", "caption": "a green double decker bus", "annotations": [{"polygon": [[128, 252], [159, 250], [163, 215], [131, 218]], "text": "Star", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Star", "recog_valid": true, "glyph_recog_text": "。", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[96, 261], [125, 260], [128, 219], [98, 225]], "text": "Eagle", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "营", "recog_valid": false, "glyph_recog_text": "w rs", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431139.jpg", "caption": "a red train with a large advertisement on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037925.jpg", "caption": "a man on a skateboard", "annotations": [{"polygon": [[169, 184], [203, 160], [208, 167], [173, 192]], "text": "WELCOMES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WOLCOM", "recog_valid": false, "glyph_recog_text": "PHCE1AG", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431154.jpg", "caption": "a bed with a teddy bear, a blanket and a pillow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562248.jpg", "caption": "a man holding a donut in front of him", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562253.jpg", "caption": "a man in a suit and tie standing on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300111.jpg", "caption": "a group of people playing frisbee in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169040.jpg", "caption": "a truck with a crane on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169086.jpg", "caption": "a woman pushing a shopping cart through a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038017.jpg", "caption": "a man riding a surfboard on a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038022.jpg", "caption": "a green double decker bus", "annotations": [{"polygon": [[235, 149], [298, 161], [298, 185], [250, 176], [247, 185], [240, 189], [233, 183], [234, 159]], "text": "green", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "green", "recog_valid": true, "glyph_recog_text": "green", "glyph_recog_ld": 1.0}, {"polygon": [[310, 163], [368, 173], [369, 193], [364, 195], [309, 185]], "text": "rovers", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "rovers", "recog_valid": true, "glyph_recog_text": "rovers", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169095.jpg", "caption": "a man sitting on a bus with a laptop", "annotations": [{"polygon": [[471, 293], [494, 305], [493, 317], [489, 321], [478, 324], [478, 324], [469, 314], [457, 309], [462, 296]], "text": "exp", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "exp", "recog_valid": true, "glyph_recog_text": "exp", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038027.jpg", "caption": "a black and white photo of a motorcycle parked on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562325.jpg", "caption": "a black and white photo of a soccer game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169116.jpg", "caption": "a man and a woman are playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562341.jpg", "caption": "a kitchen counter with a microwave, a dishwasher, and a bowl of cereal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038079.jpg", "caption": "a bathroom with two sinks and a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169159.jpg", "caption": "a skateboarder in the air doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169174.jpg", "caption": "a motorcycle parked on the street in a parking lot", "annotations": [{"polygon": [[333, 210], [353, 242], [323, 261], [307, 242]], "text": "111", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "111", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169192.jpg", "caption": "a girl in a red and white dress is holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169200.jpg", "caption": "a giraffe painted on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169211.jpg", "caption": "a bus is sitting in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431357.jpg", "caption": "a parking meter on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169243.jpg", "caption": "a large clock with a blue face and a statue of a man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562461.jpg", "caption": "a yellow and green fire hydrant on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562462.jpg", "caption": "a large clock on a building with roman numerals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431410.jpg", "caption": "a large bird sitting on top of a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038226.jpg", "caption": "a man sitting under a red umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169299.jpg", "caption": "a woman holding a tennis racket with a cardboard box on her back", "annotations": [{"polygon": [[155, 140], [146, 135], [183, 85], [194, 92]], "text": "PHOTOSHOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PHOTOSHOP", "recog_valid": true, "glyph_recog_text": "PHOTOSHOP", "glyph_recog_ld": 1.0}, {"polygon": [[140, 177], [173, 136], [165, 130], [132, 172]], "text": "EXTENDED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EXTENDED", "recog_valid": true, "glyph_recog_text": "教", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300374.jpg", "caption": "a city street with bicycles and people at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300383.jpg", "caption": "a woman playing tennis on a clay court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169322.jpg", "caption": "a plate of food on a table near the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300399.jpg", "caption": "a man holding up a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431480.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[320, 198], [347, 210], [354, 182], [339, 171], [324, 164]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "23", "recog_valid": false, "glyph_recog_text": "13", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169340.jpg", "caption": "firefighters march in a parade with flags", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038274.jpg", "caption": "a guitar case and a bag are sitting on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431492.jpg", "caption": "a blender with a glass jar filled with watermelon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169351.jpg", "caption": "a man riding a skateboard on a street at night", "annotations": [{"polygon": [[184, 188], [198, 192], [212, 194], [218, 195], [218, 208], [211, 218], [198, 217], [175, 213]], "text": "30", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SO", "recog_valid": false, "glyph_recog_text": "30", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169352.jpg", "caption": "a laptop computer sitting on a desk next to a bowl of cereal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300428.jpg", "caption": "a street lamp is on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431504.jpg", "caption": "a piece of cake on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169360.jpg", "caption": "a car driving down a road with sheep in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038301.jpg", "caption": "a mailbox with graffiti on it", "annotations": [{"polygon": [[301, 57], [301, 87], [301, 90], [347, 66], [353, 42], [349, 30]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[246, 357], [255, 377], [275, 380], [297, 377], [297, 357], [267, 348]], "text": "TIBET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "E", "recog_valid": false, "glyph_recog_text": "TIBET", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169377.jpg", "caption": "a red motorcycle parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169395.jpg", "caption": "a man looking at a clock in a yard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431555.jpg", "caption": "a bus driving down a street with people walking on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431574.jpg", "caption": "an old postcard shows a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169448.jpg", "caption": "a row of food trucks parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562664.jpg", "caption": "a cake with a train on it", "annotations": [{"polygon": [[487, 163], [480, 172], [511, 197], [512, 183]], "text": "BIG", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BIG", "recog_valid": true, "glyph_recog_text": "810", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562675.jpg", "caption": "gingerbread heart at the christmas market in munich, germany", "annotations": [{"polygon": [[260, 256], [260, 253], [266, 249], [269, 255], [278, 252], [280, 252], [278, 247], [280, 245], [282, 250], [293, 244], [304, 238], [302, 234], [303, 232], [304, 231], [307, 238], [313, 236], [325, 233], [331, 230], [328, 225], [328, 223], [330, 222], [332, 223], [332, 226], [337, 236], [337, 230], [337, 229], [338, 227], [340, 227], [358, 221], [363, 220], [367, 219], [369, 223], [371, 227], [372, 229], [372, 229], [371, 230], [362, 231], [354, 234], [337, 241], [317, 247], [305, 251], [268, 265], [264, 265], [264, 262], [263, 261], [261, 260], [261, 259]], "text": "Schatzchen", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Schotzelnen", "recog_valid": false, "glyph_recog_text": "Schatzchen", "glyph_recog_ld": 0.6363639669418482}, {"polygon": [[276, 315], [278, 334], [291, 336], [306, 335], [309, 346], [314, 346], [314, 336], [388, 327], [386, 322], [379, 310]], "text": "St", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Suannaies", "recog_valid": false, "glyph_recog_text": "s t", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038389.jpg", "caption": "a street sign with a tree in the background", "annotations": [{"polygon": [[92, 217], [84, 269], [366, 255], [368, 204]], "text": "WAVELAND", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAVELAND", "recog_valid": true, "glyph_recog_text": "WAVELAND", "glyph_recog_ld": 1.0}, {"polygon": [[386, 243], [387, 212], [430, 211], [429, 245]], "text": "AV", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AV", "recog_valid": true, "glyph_recog_text": "AV", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562678.jpg", "caption": "a woman on a bike with a sign has texts", "annotations": [{"polygon": [[63, 103], [73, 126], [159, 95], [149, 71]], "text": "folding", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "folding", "recog_valid": true, "glyph_recog_text": "folding", "glyph_recog_ld": 1.0}, {"polygon": [[59, 136], [68, 154], [128, 130], [119, 112]], "text": "tricia", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "tricia", "recog_valid": true, "glyph_recog_text": "tricia", "glyph_recog_ld": 1.0}, {"polygon": [[129, 110], [137, 127], [188, 107], [181, 88]], "text": "wolf", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "wolf", "recog_valid": true, "glyph_recog_text": "wolf", "glyph_recog_ld": 1.0}, {"polygon": [[49, 138], [58, 157], [8, 177], [1, 157]], "text": "bike", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "bike", "recog_valid": true, "glyph_recog_text": "bike", "glyph_recog_ld": 1.0}, {"polygon": [[15, 371], [4, 394], [4, 394], [114, 416], [114, 416], [117, 393]], "text": "ATTACK!", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "ATTACK", "recog_valid": false, "glyph_recog_text": "ATTACK!", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[426, 166], [434, 186], [508, 166], [500, 145]], "text": "GRRR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "GRRR", "recog_valid": true, "glyph_recog_text": "GRRR", "glyph_recog_ld": 1.0}, {"polygon": [[59, 182], [59, 182], [63, 201], [187, 169], [180, 148]], "text": "pedestrians!", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "pedestrians", "recog_valid": false, "glyph_recog_text": "pedestrlans!", "glyph_recog_ld": 0.8333334722221064}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300537.jpg", "caption": "a car driving down a wet street with people walking in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431625.jpg", "caption": "a person holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562712.jpg", "caption": "a black umbrella is sitting in a trash can", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169499.jpg", "caption": "a red truck driving down a street with a bus behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562721.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038435.jpg", "caption": "a bus stop sign", "annotations": [{"polygon": [[155, 203], [149, 250], [386, 245], [379, 200]], "text": "BUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BUS", "recog_valid": true, "glyph_recog_text": "B U S", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[125, 259], [110, 313], [426, 305], [418, 258]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562740.jpg", "caption": "a white plate topped with a slice of pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169527.jpg", "caption": "a group of people standing on a beach with surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431681.jpg", "caption": "a black cat laying on a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431692.jpg", "caption": "a blue sign with writing on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300620.jpg", "caption": "a man sitting at a table with three laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300622.jpg", "caption": "a young girl in a suit and tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562770.jpg", "caption": "a neon sign that says luggage", "annotations": [{"polygon": [[171, 259], [173, 283], [324, 268], [323, 245]], "text": "LUGGAGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LUGGAGE", "recog_valid": true, "glyph_recog_text": "LUGGAGE", "glyph_recog_ld": 1.0}, {"polygon": [[401, 276], [400, 308], [432, 306], [430, 279]], "text": "La", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "L^", "recog_valid": false, "glyph_recog_text": "L", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[436, 274], [440, 305], [504, 302], [502, 271]], "text": "Flewr", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Rlevn", "recog_valid": false, "glyph_recog_text": "Flewr", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[227, 401], [227, 433], [362, 445], [360, 418]], "text": "LUGGAGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "LUGGAGE", "recog_valid": true, "glyph_recog_text": "LUGGAGE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038487.jpg", "caption": "a horse with its mouth open", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169589.jpg", "caption": "a street sign with a blue and white sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300661.jpg", "caption": "a miniature model of a sailboat with figurines on it", "annotations": [{"polygon": [[0, 348], [-1, 381], [94, 374], [91, 352]], "text": "inns", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "inns", "recog_valid": true, "glyph_recog_text": "inns", "glyph_recog_ld": 1.0}, {"polygon": [[0, 393], [-1, 427], [38, 422], [37, 397]], "text": "ta", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "ta", "recog_valid": true, "glyph_recog_text": "ta", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562819.jpg", "caption": "a clock in a mall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038532.jpg", "caption": "a boy sitting on the curb", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431750.jpg", "caption": "a red and green bed with a quilt and pillows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562835.jpg", "caption": "a man standing in the kitchen with a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038548.jpg", "caption": "a man throwing a frisbee", "annotations": [{"polygon": [[134, 333], [149, 328], [163, 350], [177, 358], [197, 357], [210, 350], [217, 340], [220, 318], [234, 317], [233, 329], [233, 344], [225, 358], [213, 367], [198, 375], [170, 372], [155, 361], [143, 349]], "text": "SPEEDROCK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "速动", "recog_valid": false, "glyph_recog_text": "SPEEDROCK", "glyph_recog_ld": 1.1111098765503868e-06}, {"polygon": [[193, 260], [207, 266], [215, 272], [228, 290], [217, 295], [208, 284], [198, 278], [191, 275]], "text": "PAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "QAV", "recog_valid": false, "glyph_recog_text": "PAN", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038558.jpg", "caption": "a busy street with many cars and motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300704.jpg", "caption": "a large airplane parked on the tarmac", "annotations": [{"polygon": [[101, 131], [126, 136], [132, 174], [93, 172]], "text": "AA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "AA", "recog_valid": true, "glyph_recog_text": "A", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169640.jpg", "caption": "a bus stop with a sign that says 11", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562862.jpg", "caption": "a black car parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300721.jpg", "caption": "a man in a suit and tie cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431795.jpg", "caption": "a parking meter on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169656.jpg", "caption": "a couple of buses parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038589.jpg", "caption": "a person skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431812.jpg", "caption": "a stop sign on a street with a car parked in front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300748.jpg", "caption": "a man on a skateboard is riding on a ramp", "annotations": [{"polygon": [[390, 263], [395, 287], [513, 285], [513, 253]], "text": "TEXTIL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "TEXTIL", "recog_valid": true, "glyph_recog_text": "TEXTIL", "glyph_recog_ld": 1.0}, {"polygon": [[389, 209], [388, 258], [449, 258], [512, 247], [512, 190]], "text": "MONKE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MONKE", "recog_valid": true, "glyph_recog_text": "MONKE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431824.jpg", "caption": "a young boy in a baseball uniform", "annotations": [{"polygon": [[291, 197], [263, 184], [277, 160], [300, 169]], "text": "14", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "14", "recog_valid": true, "glyph_recog_text": "14", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300754.jpg", "caption": "a blue double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038614.jpg", "caption": "a train traveling down the tracks near a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038616.jpg", "caption": "a dog laying on a couch next to a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562911.jpg", "caption": "a helicopter is parked on the grass next to a plane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300772.jpg", "caption": "a young boy swinging a baseball bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169722.jpg", "caption": "a metal plate with the name philadelphia on it", "annotations": [{"polygon": [[186, 88], [186, 88], [189, 103], [202, 100], [219, 99], [232, 100], [253, 105], [278, 118], [293, 133], [309, 150], [315, 163], [329, 156], [326, 144], [312, 124], [298, 111], [274, 94], [248, 86], [233, 83], [214, 82], [186, 87]], "text": "LOCOMOTIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "OCOMOT", "recog_valid": false, "glyph_recog_text": "LOCOMOTIVE", "glyph_recog_ld": 0.6000003999996}, {"polygon": [[125, 197], [122, 243], [304, 247], [304, 196], [125, 198]], "text": "57994", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "57994", "recog_valid": true, "glyph_recog_text": "57994", "glyph_recog_ld": 1.0}, {"polygon": [[131, 299], [123, 313], [140, 328], [168, 345], [209, 353], [254, 347], [261, 342], [316, 302], [317, 299], [307, 289], [289, 304], [265, 321], [244, 332], [214, 337], [186, 334], [157, 320]], "text": "NOVEMBER,1825", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "NOVEMBER, 1825", "glyph_recog_ld": 7.142852040953329e-07}, {"polygon": [[175, 108], [138, 134], [126, 148], [107, 181], [103, 200], [88, 196], [90, 181], [97, 168], [105, 148], [115, 133], [138, 112], [146, 103], [169, 92]], "text": "BALDWIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "BA1U1227", "recog_valid": false, "glyph_recog_text": "BALDWIN", "glyph_recog_ld": 0.2500009374988281}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169725.jpg", "caption": "a giraffe in a pen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038663.jpg", "caption": "two men working on a yellow motorcycle in a garage", "annotations": [{"polygon": [[14, 391], [32, 418], [490, 133], [486, 96], [15, 389]], "text": "REDSHIFT-DECOY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RENSHFTDECO", "recog_valid": false, "glyph_recog_text": "REDSHIFT-DECOY", "glyph_recog_ld": 0.7142859183672012}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300811.jpg", "caption": "a group of people sitting in a classroom watching a video", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300815.jpg", "caption": "a stop sign with a sign that says eating animals", "annotations": [{"polygon": [[114, 24], [114, 83], [285, 108], [289, 55]], "text": "McNAMEE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "MCNAMEE", "recog_valid": false, "glyph_recog_text": "McNAMEE", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[337, 66], [337, 117], [373, 123], [375, 72]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "0H", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[173, 167], [175, 220], [274, 170], [274, 124], [271, 118]], "text": "PICKARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PICKARD", "recog_valid": true, "glyph_recog_text": "PICKARD", "glyph_recog_ld": 1.0}, {"polygon": [[298, 100], [295, 163], [347, 139], [343, 80]], "text": "AVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "学", "recog_valid": false, "glyph_recog_text": "<>", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[158, 291], [160, 394], [354, 410], [358, 411], [364, 411], [364, 315]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[199, 407], [205, 443], [333, 445], [328, 410]], "text": "EATING", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "EATIG", "recog_valid": false, "glyph_recog_text": "EATING", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[212, 444], [207, 472], [330, 476], [336, 444]], "text": "ANIMALS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ANIAALS", "recog_valid": false, "glyph_recog_text": "ANIMALS", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431899.jpg", "caption": "a group of people standing on the top of a purple bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431902.jpg", "caption": "a group of men sitting at a table with flags behind them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431904.jpg", "caption": "a man on a skateboard doing a trick on a street", "annotations": [{"polygon": [[151, 123], [181, 123], [175, 155], [147, 156]], "text": "T", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "u", "recog_valid": false, "glyph_recog_text": "T", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169763.jpg", "caption": "a motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300836.jpg", "caption": "a clock with a rhinoceros on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169774.jpg", "caption": "a woman sitting in a boat with a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431921.jpg", "caption": "a street with many signs and cars driving down it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431933.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038729.jpg", "caption": "a train traveling down the tracks in the desert", "annotations": [{"polygon": [[222, 186], [224, 215], [257, 222], [256, 201], [228, 186], [224, 187]], "text": "Santa", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Sania", "recog_valid": false, "glyph_recog_text": "Sanfa", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431948.jpg", "caption": "a shelf with various kitchen appliances on display", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300887.jpg", "caption": "three jockeys on horses racing on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038747.jpg", "caption": "a line of luggage at an airport", "annotations": [{"polygon": [[360, 414], [352, 437], [370, 449], [413, 447]], "text": "Sou", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Sow", "recog_valid": false, "glyph_recog_text": "Sou", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038751.jpg", "caption": "an old red truck parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563048.jpg", "caption": "a display of stuffed animals in a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431977.jpg", "caption": "a stuffed polar bear sitting next to a book, a box and a picture frame", "annotations": [{"polygon": [[46, 227], [52, 255], [79, 247], [105, 245], [128, 245], [124, 218]], "text": "Black", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Black", "recog_valid": true, "glyph_recog_text": "Black", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431984.jpg", "caption": "a school bus is parked in a lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431986.jpg", "caption": "a black and white boat is traveling through the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563058.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431991.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169854.jpg", "caption": "a little girl reading a book on a bed", "annotations": [{"polygon": [[-1, 377], [18, 361], [34, 371], [-2, 400]], "text": "GOO", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "000", "recog_valid": false, "glyph_recog_text": "300", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169878.jpg", "caption": "a vase of flowers on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300950.jpg", "caption": "a fruit and vegetable market with lots of different fruits and vegetables", "annotations": [{"polygon": [[238, 96], [238, 111], [346, 98], [344, 81]], "text": "MACEDONIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "MACEDONIA", "recog_valid": true, "glyph_recog_text": "MACEDONIA", "glyph_recog_ld": 1.0}, {"polygon": [[347, 84], [364, 115], [377, 108], [359, 77]], "text": "IHEREI", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "iHERE", "recog_valid": false, "glyph_recog_text": "HEREI", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169883.jpg", "caption": "a white refrigerator with writing on it", "annotations": [{"polygon": [[130, 80], [132, 120], [375, 114], [382, 93], [361, 77], [252, 76], [170, 79]], "text": "VOODOO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "VOODOO", "recog_valid": true, "glyph_recog_text": "VOODOO", "glyph_recog_ld": 1.0}, {"polygon": [[133, 211], [132, 249], [263, 248], [313, 248], [342, 293], [364, 267], [369, 198], [313, 195], [248, 201], [160, 210]], "text": "TODAY HIRE NOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TODAY", "recog_valid": false, "glyph_recog_text": "TOOAYIHBEHNON", "glyph_recog_ld": 0.30769284023627674}, {"polygon": [[193, 258], [184, 299], [241, 293], [273, 288], [275, 258], [230, 251]], "text": "HERE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HRE", "recog_valid": false, "glyph_recog_text": "HERE", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[203, 297], [199, 316], [277, 317], [283, 283]], "text": "Now", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Roa", "recog_valid": false, "glyph_recog_text": "Now", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[220, 319], [213, 374], [220, 379], [247, 375], [257, 366], [258, 318]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LO", "recog_valid": false, "glyph_recog_text": "LO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563123.jpg", "caption": "three people sitting on a bench with skateboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038862.jpg", "caption": "a display of oranges in a market", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563164.jpg", "caption": "a group of women sitting on a subway train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563168.jpg", "caption": "a stop sign with a flag on it", "annotations": [{"polygon": [[80, 247], [79, 277], [140, 286], [140, 259]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301039.jpg", "caption": "a keyboard and mouse are sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301042.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170012.jpg", "caption": "a large clock in a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563233.jpg", "caption": "a stop sign and a street sign on a road", "annotations": [{"polygon": [[292, 242], [292, 242], [386, 229], [390, 234], [395, 272], [297, 285]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432170.jpg", "caption": "a group of men playing frisbee in a field", "annotations": [{"polygon": [[126, 234], [162, 236], [158, 268], [127, 264]], "text": "71", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "7", "recog_valid": false, "glyph_recog_text": "71", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563257.jpg", "caption": "a person doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563270.jpg", "caption": "a man and a boy looking at a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301128.jpg", "caption": "a man and a woman are petting a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432212.jpg", "caption": "a cat is sniffing something on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432218.jpg", "caption": "ryanair boeing 737-800 ryanair", "annotations": [{"polygon": [[312, 260], [414, 242], [422, 264], [320, 283]], "text": "RYANAIR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RYANAIR", "recog_valid": true, "glyph_recog_text": "RYANAIR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170080.jpg", "caption": "a man carrying a surfboard on the beach", "annotations": [{"polygon": [[372, 208], [372, 218], [372, 232], [372, 245], [375, 259], [379, 273], [384, 283], [392, 295], [403, 288], [397, 282], [391, 272], [385, 259], [382, 245], [382, 235], [383, 225], [386, 210]], "text": "your mother", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Curmotm", "recog_valid": false, "glyph_recog_text": "your mother", "glyph_recog_ld": 0.45454595041277235}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301156.jpg", "caption": "a white truck with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563301.jpg", "caption": "a large yellow house with a clock on the front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301160.jpg", "caption": "a man is playing tennis on a grass court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301158.jpg", "caption": "a blue and white bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301171.jpg", "caption": "a group of cows in a field with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563356.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[284, 324], [291, 402], [298, 409], [314, 415], [318, 407], [302, 337]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "9", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[258, 274], [258, 274], [265, 301], [302, 328], [292, 294], [271, 278]], "text": "NEGRT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "E", "recog_valid": false, "glyph_recog_text": "NEGRT", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301218.jpg", "caption": "a truck driving down the road at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563364.jpg", "caption": "a group of people posing for a photo with food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039078.jpg", "caption": "a white bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301225.jpg", "caption": "a man riding a skateboard on a ramp inside a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563370.jpg", "caption": "two people on surfboards in the ocean near a town", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563376.jpg", "caption": "a man and horses pulling a cart with logs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039089.jpg", "caption": "a man playing a video game on a television", "annotations": [{"polygon": [[172, 104], [171, 133], [190, 126], [204, 122], [203, 103], [186, 106]], "text": "Penrrpp", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Polter", "recog_valid": false, "glyph_recog_text": "Penipe", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039099.jpg", "caption": "a woman sitting at a table with a bunch of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301247.jpg", "caption": "a baseball player throwing a ball", "annotations": [{"polygon": [[178, 289], [202, 317], [266, 254], [245, 224], [190, 263], [178, 289]], "text": "terS 3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "eios", "recog_valid": false, "glyph_recog_text": "terS 3", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039120.jpg", "caption": "a man in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432349.jpg", "caption": "a group of people standing around a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432363.jpg", "caption": "a parking meter with a coin slot and a coin", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301296.jpg", "caption": "a street sign that reads ventura fwy and a tree", "annotations": [{"polygon": [[338, 145], [339, 166], [396, 177], [403, 150]], "text": "Fwy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Fwy", "recog_valid": true, "glyph_recog_text": "Fwy", "glyph_recog_ld": 1.0}, {"polygon": [[117, 313], [117, 313], [185, 315], [182, 356], [112, 353]], "text": "35", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "35", "recog_valid": true, "glyph_recog_text": "35", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170235.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039163.jpg", "caption": "a copper pitcher and a small tree in a pot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563475.jpg", "caption": "a large group of people standing in a line", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432420.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432424.jpg", "caption": "a group of people working on laptops in a workshop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432428.jpg", "caption": "a semi truck with a trailer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563511.jpg", "caption": "a sign that says photo enforced on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563514.jpg", "caption": "a yellow bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301381.jpg", "caption": "a living room with a television and a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432460.jpg", "caption": "a vase with a branch in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432467.jpg", "caption": "a cat is sitting on a scooter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301419.jpg", "caption": "a small white airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563593.jpg", "caption": "a bus driving down a street with cars parked on both sides", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432527.jpg", "caption": "a man swinging a tennis racket at a ball on a tennis court", "annotations": [{"polygon": [[285, 83], [286, 84], [294, 158], [84, 169], [83, 93], [269, 81]], "text": "IBAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "IBAS", "recog_valid": true, "glyph_recog_text": "IBAS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563601.jpg", "caption": "a pink lunch box with fruit, vegetables and a sandwich", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170390.jpg", "caption": "a black and white photo of a street with writing on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039321.jpg", "caption": "a woman is standing on the sidewalk at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563617.jpg", "caption": "a group of people walking down a sidewalk with surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563628.jpg", "caption": "two young boys playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170413.jpg", "caption": "a flip phone on a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039360.jpg", "caption": "a little girl eating a hot dog on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039359.jpg", "caption": "a boat is traveling down a river with a large clock tower in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563652.jpg", "caption": "a kitchen with a table and chairs in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301508.jpg", "caption": "a narrow street with a clock tower in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563658.jpg", "caption": "a white truck with a box on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170451.jpg", "caption": "four fighter jets flying in formation with smoke coming out of their tails", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170464.jpg", "caption": "two women holding surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039434.jpg", "caption": "a man is standing in front of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563725.jpg", "caption": "a clock on a pole in a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170512.jpg", "caption": "a pilot boat and deer in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301591.jpg", "caption": "a large hamburger on a plate with a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563743.jpg", "caption": "a girl with blue hair holding a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301602.jpg", "caption": "three police officers on motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039464.jpg", "caption": "a sign on a pole with a pizza express sign", "annotations": [{"polygon": [[97, 312], [96, 345], [181, 345], [180, 312]], "text": "PIZZA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PiZZA", "recog_valid": false, "glyph_recog_text": "PIZZA", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[93, 353], [92, 387], [178, 386], [178, 355]], "text": "SLICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SLICE", "recog_valid": true, "glyph_recog_text": "SLICE", "glyph_recog_ld": 1.0}, {"polygon": [[100, 393], [88, 430], [178, 429], [179, 394]], "text": "EXPRESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "EXPRESS", "recog_valid": true, "glyph_recog_text": "EXPRESS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170540.jpg", "caption": "a black and white photo of people riding bikes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432686.jpg", "caption": "a cat is sitting on top of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301615.jpg", "caption": "a group of people on a beach holding up a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039497.jpg", "caption": "a bus with a large red and white stripe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563791.jpg", "caption": "a group of people skiing on a snow covered surface", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301649.jpg", "caption": "a woman feeding a cake to another woman", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432727.jpg", "caption": "a boy in an orange shirt holding a remote control", "annotations": [{"polygon": [[218, 294], [229, 288], [240, 285], [252, 285], [268, 290], [287, 301], [299, 313], [291, 340], [278, 329], [265, 320], [255, 315], [243, 312], [226, 316], [217, 324]], "text": "AUBURN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "pPEURy", "recog_valid": false, "glyph_recog_text": "AUBURN", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432742.jpg", "caption": "two men shaking hands in front of a large window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039528.jpg", "caption": "two baseball players jumping for joy after hitting a home run", "annotations": [{"polygon": [[376, 111], [393, 131], [401, 133], [419, 126], [416, 119], [411, 108], [407, 98], [401, 96], [380, 106]], "text": "66", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "68", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039542.jpg", "caption": "a fire truck spraying water on a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432770.jpg", "caption": "a person laying on a bed with their legs up", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170636.jpg", "caption": "police motorcycles are driving down the field at a baseball game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301712.jpg", "caption": "a group of men standing around a table with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432806.jpg", "caption": "a woman sitting on the floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301747.jpg", "caption": "two polar bears walking in the dirt near a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301749.jpg", "caption": "a green fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563898.jpg", "caption": "a kitchen with white cabinets and stainless steel appliances", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170683.jpg", "caption": "a parking lot with a lot of cars and a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301769.jpg", "caption": "a street light with traffic lights on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039632.jpg", "caption": "a street with a no parking sign and a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432849.jpg", "caption": "a train engine with a shadow on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432873.jpg", "caption": "a tennis player on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563948.jpg", "caption": "a steam engine train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432877.jpg", "caption": "a box of smart ones pasta with broccoli and chicken", "annotations": [{"polygon": [[96, 135], [66, 154], [58, 174], [60, 191], [73, 197], [94, 197], [112, 187], [212, 190], [216, 150]], "text": "Ones", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Ones", "recog_valid": true, "glyph_recog_text": "Ones", "glyph_recog_ld": 1.0}, {"polygon": [[79, 72], [64, 92], [45, 128], [53, 138], [69, 141], [85, 139], [92, 133], [240, 138], [246, 93], [97, 76]], "text": "Smart", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Smart", "recog_valid": true, "glyph_recog_text": "Smart", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432896.jpg", "caption": "cows on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432909.jpg", "caption": "a red fire hydrant is behind a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563986.jpg", "caption": "a stop sign with a street sign on it", "annotations": [{"polygon": [[152, 219], [139, 279], [139, 282], [159, 285], [279, 281], [307, 278], [313, 227], [309, 207], [248, 211], [199, 211], [166, 214]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[261, 108], [259, 133], [400, 122], [398, 101], [337, 107], [288, 111], [277, 110], [273, 106]], "text": "Reservoir", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Reservoir", "recog_valid": true, "glyph_recog_text": "Reservoir", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170780.jpg", "caption": "a suitcase with clothes and shoes inside", "annotations": [{"polygon": [[203, 274], [227, 360], [220, 362], [196, 275]], "text": "REVOLUTIONS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REVOLUTIONS", "recog_valid": true, "glyph_recog_text": "sEy91vt.chs", "glyph_recog_ld": 0.0909099173546205}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432924.jpg", "caption": "a woman taking a picture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039711.jpg", "caption": "a stop sign with the words can stop splunk written on it", "annotations": [{"polygon": [[244, 193], [244, 193], [367, 190], [367, 190], [371, 150], [249, 157], [243, 193]], "text": "CAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CAN", "recog_valid": true, "glyph_recog_text": "CAN", "glyph_recog_ld": 1.0}, {"polygon": [[210, 312], [210, 312], [471, 315], [470, 199], [214, 200], [210, 311]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[281, 374], [281, 374], [305, 364], [375, 364], [413, 379], [415, 382], [448, 351], [395, 321], [343, 321], [299, 324], [252, 328], [281, 374]], "text": "SPUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPUNN", "recog_valid": false, "glyph_recog_text": "SPUR", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301855.jpg", "caption": "a group of people standing in an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170785.jpg", "caption": "people standing in line at a food truck", "annotations": [{"polygon": [[460, 131], [458, 165], [481, 163], [512, 158], [511, 130]], "text": "Wh", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Wh", "recog_valid": true, "glyph_recog_text": "Wh", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432932.jpg", "caption": "a yellow bus on a road with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301875.jpg", "caption": "a stop sign with a sign that says eating animals", "annotations": [{"polygon": [[144, 226], [307, 227], [306, 269], [140, 269]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039733.jpg", "caption": "a school bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564031.jpg", "caption": "a train with a yellow and black engine on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564050.jpg", "caption": "a display of carrots and celery in a grocery store", "annotations": [{"polygon": [[56, 344], [67, 339], [90, 375], [77, 380]], "text": "CARROTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CARROTS", "recog_valid": true, "glyph_recog_text": "CARROT", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039764.jpg", "caption": "two children playing with a tennis racket", "annotations": [{"polygon": [[416, 102], [416, 136], [442, 152], [459, 150], [460, 124], [442, 105]], "text": "76", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "7s", "recog_valid": false, "glyph_recog_text": "7", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432981.jpg", "caption": "a man on a skateboard doing a trick", "annotations": [{"polygon": [[151, 380], [181, 373], [193, 405], [160, 412], [160, 412]], "text": "KA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "李", "recog_valid": false, "glyph_recog_text": "至", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301918.jpg", "caption": "two old biplanes are parked in a hangar", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564063.jpg", "caption": "a man in uniform is driving an old military truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039779.jpg", "caption": "a city street at night with many lights", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564072.jpg", "caption": "a red bus is parked on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564074.jpg", "caption": "a street with a fire hydrant and a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039802.jpg", "caption": "a man wearing a black shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433021.jpg", "caption": "titans way victory lane", "annotations": [{"polygon": [[227, 93], [334, 125], [325, 160], [228, 134]], "text": "VICTORY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VICTORY", "recog_valid": true, "glyph_recog_text": "VICTORY", "glyph_recog_ld": 1.0}, {"polygon": [[251, 62], [326, 56], [322, 94], [264, 97], [252, 93]], "text": "TITANS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "TITANS", "recog_valid": true, "glyph_recog_text": "TITANS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301954.jpg", "caption": "a teddy bear wearing a red shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433046.jpg", "caption": "a clock on a brick wall", "annotations": [{"polygon": [[263, 275], [270, 266], [280, 243], [247, 245], [233, 272]], "text": "III", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "亚", "recog_valid": false, "glyph_recog_text": "lli", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170925.jpg", "caption": "a woman throwing a frisbee on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302000.jpg", "caption": "a green and yellow train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170931.jpg", "caption": "a red fire hydrant in the woods", "annotations": [{"polygon": [[222, 201], [218, 214], [221, 226], [229, 236], [238, 240], [250, 240], [254, 237], [251, 228], [240, 227], [230, 215], [232, 205], [226, 201]], "text": "STORZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "元品", "recog_valid": false, "glyph_recog_text": "STORZ", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[239, 188], [239, 199], [250, 200], [257, 204], [256, 223], [266, 227], [269, 221], [271, 208], [263, 194], [255, 189]], "text": "STORZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "O", "recog_valid": false, "glyph_recog_text": "STORZ", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433084.jpg", "caption": "a view of the wing of a small plane with a tree in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564162.jpg", "caption": "a fire extinguisher sitting on the floor next to a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433097.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[195, 179], [194, 196], [205, 203], [253, 211], [253, 201], [240, 194], [217, 187]], "text": "MAAING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MAIINE", "recog_valid": false, "glyph_recog_text": "MAAING", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302029.jpg", "caption": "a bicycle with an umbrella attached to it", "annotations": [{"polygon": [[104, 294], [93, 302], [203, 401], [211, 391]], "text": "Kona", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Pouor", "recog_valid": false, "glyph_recog_text": "K Bo.", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564183.jpg", "caption": "a baseball player is swinging a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170968.jpg", "caption": "a black and red suitcase", "annotations": [{"polygon": [[292, 321], [289, 324], [322, 357], [324, 354]], "text": "Oceanic", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ceno", "recog_valid": false, "glyph_recog_text": ". .....", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564186.jpg", "caption": "a bus accident in the city with a car and a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564187.jpg", "caption": "a busy street with people walking and standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433122.jpg", "caption": "a baby is brushing his teeth with a toothbrush", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302051.jpg", "caption": "a baseball game in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564211.jpg", "caption": "a baby sleeping next to a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302078.jpg", "caption": "a baseball player swinging at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171011.jpg", "caption": "a man walking on the street next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171012.jpg", "caption": "a police officer on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171017.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433168.jpg", "caption": "a silver refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302102.jpg", "caption": "two men standing in the snow holding snowboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171038.jpg", "caption": "a woman holding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039971.jpg", "caption": "a yellow and black train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302116.jpg", "caption": "a skateboarder is doing a trick on a rail", "annotations": [{"polygon": [[159, 29], [156, 69], [261, 129], [266, 101], [160, 28]], "text": "AMENITY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "AMENITY", "recog_valid": true, "glyph_recog_text": "AMENITY", "glyph_recog_ld": 1.0}, {"polygon": [[157, 80], [159, 115], [241, 154], [244, 127], [159, 79]], "text": "LUXURY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LUXURY", "recog_valid": true, "glyph_recog_text": "LUXURY", "glyph_recog_ld": 1.0}, {"polygon": [[67, 52], [65, 68], [124, 100], [129, 83], [69, 53]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564265.jpg", "caption": "a tennis player is about to hit the ball", "annotations": [{"polygon": [[93, 13], [215, 8], [222, 18], [220, 46], [204, 48], [169, 49], [92, 54]], "text": "SHIPS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SHIPS", "recog_valid": true, "glyph_recog_text": "SHIPS", "glyph_recog_ld": 1.0}, {"polygon": [[311, 153], [333, 148], [374, 147], [420, 144], [419, 214], [371, 218], [334, 220], [304, 217], [292, 188], [300, 163]], "text": "CH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CH", "recog_valid": true, "glyph_recog_text": "CH", "glyph_recog_ld": 1.0}, {"polygon": [[119, 162], [138, 158], [190, 157], [207, 155], [234, 154], [230, 235], [190, 227], [132, 232], [108, 222], [100, 192], [106, 174]], "text": "O", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "o", "recog_valid": false, "glyph_recog_text": "0", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039978.jpg", "caption": "a street with traffic lights and a street light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433212.jpg", "caption": "a group of people sitting on the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302141.jpg", "caption": "a model of a large airport with a plane and cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564287.jpg", "caption": "a silver train sitting on the tracks at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433221.jpg", "caption": "a table with a plate of food and a cup of coffee", "annotations": [{"polygon": [[428, 43], [423, 67], [500, 87], [506, 70]], "text": "Bakery", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Mib", "recog_valid": false, "glyph_recog_text": "Bakery", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171082.jpg", "caption": "a blue sign with the words paris avenue no through road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564302.jpg", "caption": "a home office with a desk, chair and computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433232.jpg", "caption": "a large blue truck with cheerleaders on the roof", "annotations": [{"polygon": [[373, 165], [377, 192], [417, 198], [413, 176]], "text": "SAMSUNG", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SAATE", "recog_valid": false, "glyph_recog_text": "SASLING", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433236.jpg", "caption": "a towel on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564314.jpg", "caption": "a black and white photo of a horse drawn carriage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564339.jpg", "caption": "a woman is skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302199.jpg", "caption": "two men are pulling a horse trailer with a rope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433274.jpg", "caption": "two people posing for a picture on a snowy mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171139.jpg", "caption": "a man sitting on a couch with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433288.jpg", "caption": "a plate with food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433297.jpg", "caption": "a bus with a mural on the side driving down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040091.jpg", "caption": "a woman in a swimsuit is looking at the water", "annotations": [{"polygon": [[88, 263], [96, 258], [101, 254], [105, 250], [110, 247], [117, 239], [119, 232], [114, 230], [113, 234], [110, 237], [105, 242], [102, 245], [98, 249], [82, 261]], "text": "VENEZUELA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "VENEZUEU", "recog_valid": false, "glyph_recog_text": "1295516", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171180.jpg", "caption": "a group of people standing around a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040114.jpg", "caption": "a boy sitting on a window sill holding a book", "annotations": [{"polygon": [[301, 170], [302, 173], [372, 144], [369, 141]], "text": "FOOTVALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UTINATE MUUUET", "recog_valid": false, "glyph_recog_text": "t.tl.lt", "glyph_recog_ld": 7.142852040953329e-07}, {"polygon": [[302, 175], [302, 175], [311, 196], [386, 165], [376, 145]], "text": "FOOTBALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FOOTBALL", "recog_valid": true, "glyph_recog_text": "FOOTBALL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433331.jpg", "caption": "a green and red train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433336.jpg", "caption": "a man and a woman standing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171194.jpg", "caption": "a boy is doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564421.jpg", "caption": "a woman in blue shirt and black shorts playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302281.jpg", "caption": "a blue and white airplane parked on the tarmac", "annotations": [{"polygon": [[292, 281], [302, 260], [335, 271], [322, 295]], "text": "Austral", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALSEE", "recog_valid": false, "glyph_recog_text": "Auttai", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433353.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040138.jpg", "caption": "a boat is in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564443.jpg", "caption": "a man is standing in a city square", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040158.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171231.jpg", "caption": "a red double decker bus", "annotations": [{"polygon": [[323, 315], [326, 327], [369, 296], [366, 287], [324, 315]], "text": "Winterfresh", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WhaterFhesh", "recog_valid": false, "glyph_recog_text": "Wanieedoesh", "glyph_recog_ld": 0.45454595041277235}, {"polygon": [[237, 218], [242, 233], [302, 210], [299, 198], [237, 219]], "text": "parrtybus Zakopane", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Zakopane", "recog_valid": false, "glyph_recog_text": "ankoue Zalopan", "glyph_recog_ld": 0.35714331632620255}, {"polygon": [[233, 206], [238, 220], [300, 194], [293, 182], [273, 186], [234, 206]], "text": "partybus", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "partybus", "recog_valid": true, "glyph_recog_text": "partybus", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433397.jpg", "caption": "a man holding a laptop with a blue background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171262.jpg", "caption": "two men sitting at a table with two dogs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040201.jpg", "caption": "a large airplane sitting on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433420.jpg", "caption": "a cat sitting on top of a refrigerator", "annotations": [{"polygon": [[298, 241], [298, 241], [346, 227], [348, 241], [294, 262]], "text": "Shiner", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Shiner", "recog_valid": true, "glyph_recog_text": "Shiner", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171281.jpg", "caption": "a young man in blue shirt and black shorts playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171284.jpg", "caption": "a man holding two parrots on his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171287.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171290.jpg", "caption": "a train is on the tracks with a few cars", "annotations": [{"polygon": [[339, 217], [339, 195], [417, 171], [416, 193]], "text": "PBOTECT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EU", "recog_valid": false, "glyph_recog_text": "PBOTECT", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040248.jpg", "caption": "a kitchen with a stove, oven, and shelves", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171321.jpg", "caption": "a man holding a tennis racket", "annotations": [{"polygon": [[201, 333], [216, 361], [273, 346], [274, 327]], "text": "PIPER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "wbei", "recog_valid": false, "glyph_recog_text": "PIPER", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564544.jpg", "caption": "a small airplane parked on the ground next to a picnic table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433472.jpg", "caption": "tasty lunch - taiwanese food", "annotations": [{"polygon": [[88, 408], [90, 441], [243, 441], [241, 432], [251, 407], [88, 407]], "text": "TASTY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TASTY", "recog_valid": true, "glyph_recog_text": "TASTY", "glyph_recog_ld": 1.0}, {"polygon": [[262, 408], [264, 441], [423, 442], [421, 407], [263, 407]], "text": "LUNCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "LUNCH", "recog_valid": true, "glyph_recog_text": "LUNCH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564596.jpg", "caption": "a man is doing a trick on a skateboard", "annotations": [{"polygon": [[486, 376], [461, 403], [487, 425], [505, 404], [505, 392]], "text": "fd", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "三", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171380.jpg", "caption": "a stop sign and a street sign on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564602.jpg", "caption": "a snowboarder is riding a rail on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564609.jpg", "caption": "an asian man riding a bicycle with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433545.jpg", "caption": "photograph lonely by josephine de la roche on 500px", "annotations": [{"polygon": [[214, 433], [214, 469], [304, 470], [304, 432]], "text": "Lonely", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Lonely", "recog_valid": true, "glyph_recog_text": "Lonely", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433561.jpg", "caption": "british airways airbus a320-214", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564643.jpg", "caption": "a cow standing on the sidewalk next to a man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171430.jpg", "caption": "a train engine is traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171444.jpg", "caption": "a hot dog in a paper box", "annotations": [{"polygon": [[146, 447], [152, 479], [229, 478], [240, 444]], "text": "HOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "HOT", "recog_valid": true, "glyph_recog_text": "HOT", "glyph_recog_ld": 1.0}, {"polygon": [[245, 446], [252, 484], [338, 476], [327, 445]], "text": "DOG", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DOC", "recog_valid": false, "glyph_recog_text": "DOG", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171478.jpg", "caption": "the planes are flying in formation", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302552.jpg", "caption": "a woman pushing a bicycle with a small airplane", "annotations": [{"polygon": [[12, 397], [98, 397], [97, 436], [13, 436]], "text": "LIFE", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "LIFE", "recog_valid": true, "glyph_recog_text": "LIFE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040423.jpg", "caption": "a man talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433647.jpg", "caption": "a table with a lot of cakes and desserts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171536.jpg", "caption": "a group of people sitting at a table with pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433691.jpg", "caption": "a woman flying a kite in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433704.jpg", "caption": "two men standing on a field holding baseball bats", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564781.jpg", "caption": "a red building with a green sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433715.jpg", "caption": "a skateboarder doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302687.jpg", "caption": "a car driving down the road with traffic lights", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302707.jpg", "caption": "cattle ranch and farmhouse, ca", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171637.jpg", "caption": "a delta airlines airplane flying through the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433786.jpg", "caption": "a cat sitting in a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302725.jpg", "caption": "a train is going through a tunnel in the woods", "annotations": [{"polygon": [[160, 222], [192, 223], [193, 260], [160, 259]], "text": "C", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "c", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433806.jpg", "caption": "a young boy holding a baseball bat in front of a tent", "annotations": [{"polygon": [[272, 332], [266, 366], [302, 373], [308, 340]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "25", "recog_valid": true, "glyph_recog_text": "25", "glyph_recog_ld": 1.0}, {"polygon": [[206, 334], [233, 327], [261, 324], [288, 323], [302, 327], [310, 330], [316, 299], [304, 294], [290, 292], [280, 290], [265, 289], [245, 290], [231, 290], [213, 291]], "text": "Burlington", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "8 hington", "recog_valid": false, "glyph_recog_text": "Burlington", "glyph_recog_ld": 0.6000003999996}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171665.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040603.jpg", "caption": "four men standing in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564904.jpg", "caption": "a boy eating a hot dog with a bun", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564926.jpg", "caption": "rice and broccoli on a spoon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040658.jpg", "caption": "a fire truck is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564957.jpg", "caption": "a yellow bus on the side of the road", "annotations": [{"polygon": [[274, 214], [271, 233], [378, 255], [376, 238]], "text": "SANTIARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SANTIAPN", "recog_valid": false, "glyph_recog_text": "SANTIARD", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564969.jpg", "caption": "a white microwave with a pug inside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040689.jpg", "caption": "a tow truck with a vintage car on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564978.jpg", "caption": "a laptop computer, a desktop computer, and a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433910.jpg", "caption": "a man standing in front of a display of bicycles", "annotations": [{"polygon": [[355, 164], [362, 164], [361, 157], [362, 151], [364, 144], [368, 140], [372, 137], [376, 136], [380, 136], [384, 138], [388, 143], [390, 148], [390, 154], [389, 161], [388, 165], [393, 167], [395, 161], [395, 152], [394, 144], [390, 137], [382, 129], [375, 128], [370, 128], [365, 131], [362, 135], [358, 141], [357, 148], [355, 155], [355, 160]], "text": "NORDSTJE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "人", "recog_valid": false, "glyph_recog_text": "EO", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564990.jpg", "caption": "a black computer keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302855.jpg", "caption": "a painting on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565002.jpg", "caption": "a man sitting at a table with food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040720.jpg", "caption": "a man in a blue shirt is catching a frisbee", "annotations": [{"polygon": [[249, 176], [260, 178], [266, 183], [299, 185], [314, 186], [307, 197], [284, 218], [281, 217], [243, 194], [242, 189], [245, 181]], "text": "champ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ghang\"", "recog_valid": false, "glyph_recog_text": "champ", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302869.jpg", "caption": "a suitcase filled with snacks", "annotations": [{"polygon": [[295, 90], [295, 90], [347, 74], [336, 53], [313, 53], [282, 59]], "text": "Pan", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Pon", "recog_valid": false, "glyph_recog_text": "Pan", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[303, 126], [303, 126], [391, 85], [368, 64], [286, 92], [299, 127]], "text": "Pan Stelle", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Stone", "recog_valid": false, "glyph_recog_text": "Pan Stelle", "glyph_recog_ld": 0.3000006999993}, {"polygon": [[77, 216], [78, 216], [193, 152], [177, 131], [80, 134]], "text": "Abbracci", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "Abbracci", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[367, 207], [388, 279], [503, 217], [497, 199]], "text": "Abbra'o", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Abbra'o", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433975.jpg", "caption": "two men playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565053.jpg", "caption": "a clock tower with a steeple in front of a church", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171837.jpg", "caption": "a man wearing a straw hat", "annotations": [{"polygon": [[201, 87], [221, 121], [329, 102], [336, 68], [259, 75], [202, 87]], "text": "PUEKTD", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "YUERTD", "recog_valid": false, "glyph_recog_text": "PUEKTD", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171841.jpg", "caption": "a yellow and blue bus with people on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433992.jpg", "caption": "a street sign on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565081.jpg", "caption": "a young boy in an orange shirt looking at a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302940.jpg", "caption": "a surfer is in the air on a wave", "annotations": [{"polygon": [[440, 385], [440, 356], [470, 355], [470, 376]], "text": "PRO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "PRO", "recog_valid": true, "glyph_recog_text": "PRO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565115.jpg", "caption": "a woman is playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302971.jpg", "caption": "a double decker bus is driving down a street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565116.jpg", "caption": "a table with a plate of food and a newspaper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434053.jpg", "caption": "starbucks coffee mug on desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434069.jpg", "caption": "a group of people standing on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434084.jpg", "caption": "a display of apples with chinese writing on them", "annotations": [{"polygon": [[256, 183], [244, 132], [230, 132], [203, 182]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "4", "recog_valid": true, "glyph_recog_text": "4", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171940.jpg", "caption": "a halloween themed room with purple couches and a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040891.jpg", "caption": "a red train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171967.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040901.jpg", "caption": "a man is washing his hands in a bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434125.jpg", "caption": "a pedestrian crossing sign on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565201.jpg", "caption": "a pizza with pepperoni and cheese on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434141.jpg", "caption": "a stuffed animal sitting on top of a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172000.jpg", "caption": "a couple riding a scooter down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172020.jpg", "caption": "a hand holding a cat tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303108.jpg", "caption": "a black and white photo of people walking in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565257.jpg", "caption": "a blue wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172040.jpg", "caption": "a kitchen with a refrigerator, microwave, and a coffee maker", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303126.jpg", "caption": "a little girl is holding a kite in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303133.jpg", "caption": "a large clock with a bird on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000040995.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172085.jpg", "caption": "a teddy bear wearing a hat", "annotations": [{"polygon": [[115, 411], [104, 424], [133, 450], [143, 437]], "text": "1564", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "1564", "recog_valid": true, "glyph_recog_text": "1564", "glyph_recog_ld": 1.0}, {"polygon": [[159, 448], [149, 462], [176, 479], [185, 465]], "text": "1616", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "1G16", "recog_valid": false, "glyph_recog_text": "1616", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[92, 416], [72, 436], [130, 492], [171, 504], [180, 494], [141, 471]], "text": "Shakespeare", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Shakespear", "recog_valid": false, "glyph_recog_text": "Shakespeare", "glyph_recog_ld": 0.909090991735462}, {"polygon": [[286, 246], [282, 252], [309, 271], [328, 281], [331, 277]], "text": "Shakespeare", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Shakespeare", "recog_valid": true, "glyph_recog_text": "Stakrieari", "glyph_recog_ld": 0.5454549586773103}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565310.jpg", "caption": "a parking meter on a street corner", "annotations": [{"polygon": [[242, 50], [244, 60], [277, 38], [275, 26]], "text": "13208", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "13208", "recog_valid": true, "glyph_recog_text": "15305", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[291, 33], [291, 45], [326, 65], [326, 52]], "text": "13208", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "13208", "recog_valid": true, "glyph_recog_text": ":3208", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041041.jpg", "caption": "a pizza with vegetables on top of a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434262.jpg", "caption": "a double decker bus is parked next to a tall building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565360.jpg", "caption": "two men playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565365.jpg", "caption": "a basket of broccoli and carrots on display", "annotations": [{"polygon": [[202, 142], [202, 142], [208, 137], [216, 139], [218, 135], [282, 101], [292, 110], [274, 118], [253, 131], [233, 142], [222, 147], [210, 155]], "text": "BROCCOLI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Broccoli", "recog_valid": false, "glyph_recog_text": "BROCCOLI", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[230, 149], [234, 146], [238, 147], [273, 126], [279, 135], [258, 147], [233, 158]], "text": "FLOCETS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FLDCETS", "recog_valid": false, "glyph_recog_text": "FLOCETS", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303227.jpg", "caption": "a horse is walking down a street with people walking behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565379.jpg", "caption": "a group of people playing a video game", "annotations": [{"polygon": [[383, 178], [425, 168], [434, 198], [393, 208]], "text": "X", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "X", "recog_valid": true, "glyph_recog_text": "X", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565387.jpg", "caption": "a man on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172173.jpg", "caption": "a train is pulling a tanker car down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434319.jpg", "caption": "a woman standing in a subway station looking at her cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434328.jpg", "caption": "a basket of oranges on a sidewalk", "annotations": [{"polygon": [[351, 80], [361, 74], [381, 76], [386, 82], [389, 91], [397, 91], [402, 95], [404, 109], [345, 94], [343, 91], [344, 85], [347, 82]], "text": "ehicken", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "chdken", "recog_valid": false, "glyph_recog_text": "ehicken", "glyph_recog_ld": 0.5714291836725947}, {"polygon": [[337, 144], [332, 159], [425, 175], [426, 161]], "text": "SWEETCRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SWEETCRE", "recog_valid": true, "glyph_recog_text": "SWEETCRE", "glyph_recog_ld": 1.0}, {"polygon": [[381, 112], [373, 128], [425, 143], [427, 117]], "text": "salad", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "sal永g", "recog_valid": false, "glyph_recog_text": "salad", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041138.jpg", "caption": "a train station with people walking on the platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434357.jpg", "caption": "a woman walking through a terminal with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565443.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565446.jpg", "caption": "a group of people walking along a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434381.jpg", "caption": "a delta airplane parked at the gate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303340.jpg", "caption": "a black and white photo of a couple walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565500.jpg", "caption": "a woman in a red coat is walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172297.jpg", "caption": "a cow standing in front of a sign", "annotations": [{"polygon": [[33, 193], [35, 218], [81, 211], [80, 187]], "text": "Lunch", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Lunch", "recog_valid": true, "glyph_recog_text": "Lunch", "glyph_recog_ld": 1.0}, {"polygon": [[410, 193], [410, 232], [455, 232], [457, 202], [445, 192]], "text": "Cafe", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Cafe", "recog_valid": true, "glyph_recog_text": "Cafe", "glyph_recog_ld": 1.0}, {"polygon": [[362, 193], [361, 230], [407, 233], [407, 206], [374, 193]], "text": "um", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "sum", "recog_valid": false, "glyph_recog_text": "um", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[299, 169], [307, 180], [329, 150], [317, 141]], "text": "Truth", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "nogth", "recog_valid": false, "glyph_recog_text": "Truth", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041233.jpg", "caption": "mercedes-benz actros e-tron electric trucks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172316.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565546.jpg", "caption": "a man sitting in the driver's seat of a truck", "annotations": [{"polygon": [[257, 449], [259, 426], [445, 419], [446, 449]], "text": "CEMEX", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "FEMEX", "recog_valid": false, "glyph_recog_text": "CEMEX", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303408.jpg", "caption": "a clock is on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434484.jpg", "caption": "a row of motorcycles parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041284.jpg", "caption": "a pizza on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303430.jpg", "caption": "a laptop computer on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172361.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172369.jpg", "caption": "a man is doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172370.jpg", "caption": "two people on skis and snowboards on a slope", "annotations": [{"polygon": [[295, 334], [300, 346], [234, 369], [230, 357]], "text": "BURTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UO1H08", "recog_valid": false, "glyph_recog_text": "BURTON", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[245, 339], [249, 351], [193, 372], [183, 373], [180, 362]], "text": "BURTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BURTON", "recog_valid": true, "glyph_recog_text": "BURTON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172371.jpg", "caption": "a bed in a room with a painting on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172387.jpg", "caption": "a woman in a kitchen preparing food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565613.jpg", "caption": "a young girl sitting at a table with a piece of cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303484.jpg", "caption": "a man on a surfboard in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041342.jpg", "caption": "a bus with its front window open at sunset", "annotations": [{"polygon": [[163, 370], [162, 390], [216, 413], [216, 399]], "text": "Qiunces's", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Quince's", "recog_valid": false, "glyph_recog_text": "Qiunces's", "glyph_recog_ld": 0.6666670370366254}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041340.jpg", "caption": "a person riding a ski lift", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041377.jpg", "caption": "a train is coming down the tracks with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565675.jpg", "caption": "a soccer player is about to kick the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565680.jpg", "caption": "two men in wetsuits holding surfboards on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303546.jpg", "caption": "a doll laying on a bed with a dresser and a tv", "annotations": [{"polygon": [[441, 177], [476, 196], [474, 213], [439, 195]], "text": "LOST", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LOST", "recog_valid": true, "glyph_recog_text": "LOST", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303578.jpg", "caption": "a man holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172507.jpg", "caption": "a plate with a sandwich and a soda on it", "annotations": [{"polygon": [[278, 214], [280, 113], [258, 115], [258, 125], [252, 122], [240, 104], [238, 104], [242, 117], [237, 117], [242, 126], [238, 145], [242, 163], [247, 166], [250, 176], [249, 186], [251, 197], [245, 195], [239, 202], [240, 208], [252, 212], [254, 208], [278, 217]], "text": "Cocacola", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "29o9", "recog_valid": false, "glyph_recog_text": "Uouc", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303593.jpg", "caption": "a woman in a black track suit holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303608.jpg", "caption": "a double decker bus on a busy city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172537.jpg", "caption": "a b-17 bomber-c-17-b-17-a-1-a-1-a-1-a-1-a-", "annotations": [{"polygon": [[114, 300], [117, 291], [137, 289], [192, 297], [186, 330], [167, 324], [148, 321], [134, 318], [125, 317], [122, 318], [116, 310]], "text": "UDRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ORE", "recog_valid": false, "glyph_recog_text": "UDRE", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303617.jpg", "caption": "a green train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303637.jpg", "caption": "a fighter jet flying through the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303651.jpg", "caption": "a little girl and a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172582.jpg", "caption": "a blue fire hydrant", "annotations": [{"polygon": [[196, 330], [199, 357], [245, 352], [244, 326]], "text": "599", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "599", "recog_valid": true, "glyph_recog_text": "599", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565830.jpg", "caption": "a woman is looking at a newspaper on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172615.jpg", "caption": "a train traveling down a track near a church", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434771.jpg", "caption": "a tray of doughnuts on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041565.jpg", "caption": "a dog standing on a table", "annotations": [{"polygon": [[123, 201], [129, 205], [155, 179], [149, 174], [140, 182], [123, 201]], "text": "BIRTHDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EIRLHDA", "recog_valid": false, "glyph_recog_text": "HoPM5e", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172654.jpg", "caption": "a green fire hydrant and a green bucket next to it", "annotations": [{"polygon": [[323, 284], [330, 314], [357, 328], [388, 323], [415, 284], [414, 264], [391, 282], [364, 291], [340, 290]], "text": "BUTTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BUTZ", "recog_valid": false, "glyph_recog_text": "BUTTS", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172655.jpg", "caption": "a small brown calf with a tag on its ear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565876.jpg", "caption": "a man brushing his teeth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303765.jpg", "caption": "a traffic light with a sign has texts", "annotations": [{"polygon": [[349, 327], [354, 305], [416, 334], [412, 354]], "text": "Except", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Except", "recog_valid": true, "glyph_recog_text": "Except", "glyph_recog_ld": 1.0}, {"polygon": [[353, 333], [349, 352], [406, 377], [409, 357]], "text": "buses", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "buses", "recog_valid": true, "glyph_recog_text": "buses", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565921.jpg", "caption": "a man holding a lakers flag on top of a traffic light pole", "annotations": [{"polygon": [[188, 214], [190, 192], [246, 181], [240, 206]], "text": "ONLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONLY", "recog_valid": true, "glyph_recog_text": "ONLY", "glyph_recog_ld": 1.0}, {"polygon": [[179, 254], [180, 238], [181, 237], [250, 223], [250, 223], [251, 241]], "text": "MON THRU FRI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MON r FRI", "recog_valid": false, "glyph_recog_text": "MON THRUFRI", "glyph_recog_ld": 0.6363639669418482}, {"polygon": [[120, 141], [133, 164], [166, 154], [199, 147], [240, 146], [267, 139], [268, 129], [237, 94], [218, 102], [218, 110]], "text": "AKERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2H3XA", "recog_valid": false, "glyph_recog_text": "AKERS", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434857.jpg", "caption": "a table with food and an umbrella on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434859.jpg", "caption": "a large group of luggage in a large airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434884.jpg", "caption": "a bunch of bananas on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172743.jpg", "caption": "a bus driving down a snowy street in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041678.jpg", "caption": "a person cutting a piece of cake", "annotations": [{"polygon": [[113, 230], [169, 259], [198, 184], [146, 161], [119, 168], [115, 178], [131, 181]], "text": "11", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "11", "recog_valid": true, "glyph_recog_text": "二", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434894.jpg", "caption": "a bowl of food with shrimp and vegetables on a table", "annotations": [{"polygon": [[453, 321], [450, 328], [462, 350], [480, 366], [499, 376], [503, 370], [491, 363], [477, 355], [461, 337]], "text": "nApArk", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "HAPATK", "recog_valid": false, "glyph_recog_text": "nApArK", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172776.jpg", "caption": "a man riding a skateboard in a pool", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565993.jpg", "caption": "a man on a skateboard doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041710.jpg", "caption": "a woman in a blue shirt and jeans standing in front of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303870.jpg", "caption": "a snowboarder is doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041729.jpg", "caption": "a hand holding a remote control in front of a display", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303873.jpg", "caption": "a clock is on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172806.jpg", "caption": "a woman in yellow and blue tennis outfit", "annotations": [{"polygon": [[177, 12], [177, 51], [184, 121], [429, 101], [428, -1]], "text": "SO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SO", "recog_valid": true, "glyph_recog_text": "s o", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041739.jpg", "caption": "a blue and white train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041740.jpg", "caption": "a baseball player in the middle of pitching a ball", "annotations": [{"polygon": [[231, 127], [237, 162], [245, 161], [255, 167], [263, 163], [272, 148], [277, 142], [283, 141], [287, 137], [281, 131], [272, 129], [265, 128], [258, 128], [250, 125], [236, 124]], "text": "RAYS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "V", "recog_valid": false, "glyph_recog_text": "RAYS", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041742.jpg", "caption": "a black truck with a ladder on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041745.jpg", "caption": "a man and a woman holding an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434966.jpg", "caption": "a sandwich with bacon and cheese", "annotations": [{"polygon": [[146, 302], [329, 441], [390, 441], [468, 387], [307, 295]], "text": "upper", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "保", "recog_valid": false, "glyph_recog_text": "upper", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[257, 91], [299, 138], [323, 131], [317, 84]], "text": "fresh", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "显", "recog_valid": false, "glyph_recog_text": "fresh", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041756.jpg", "caption": "a man in a yellow swimsuit is holding onto a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566043.jpg", "caption": "a man in a yellow vest is walking down a row of motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041763.jpg", "caption": "a fire hydrant with graffiti on it", "annotations": [{"polygon": [[216, 397], [215, 420], [237, 423], [266, 419], [286, 415], [284, 392], [261, 399], [240, 398]], "text": "IRAO", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "哥乳人学", "recog_valid": false, "glyph_recog_text": "IRAO", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566051.jpg", "caption": "a group of people standing in front of a statue with airplanes flying overhead", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566054.jpg", "caption": "two men playing wii", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041772.jpg", "caption": "a large airplane sitting on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566062.jpg", "caption": "a green and red bird perched on top of a tree branch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566064.jpg", "caption": "a man wearing a black shirt", "annotations": [{"polygon": [[85, 32], [202, 18], [228, 15], [234, 47], [84, 58]], "text": "NDURA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "JDURA", "recog_valid": false, "glyph_recog_text": "NDURA", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566099.jpg", "caption": "a man sitting at a table with a large pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041838.jpg", "caption": "a person holding a remote control in front of a television", "annotations": [{"polygon": [[324, 110], [467, 139], [456, 218], [298, 201]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "20", "recog_valid": true, "glyph_recog_text": "20", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566126.jpg", "caption": "a motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172923.jpg", "caption": "three bears walking along a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172925.jpg", "caption": "three women holding up their skis on a podium", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566145.jpg", "caption": "three people are skiing down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172937.jpg", "caption": "a man sitting in a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304015.jpg", "caption": "a cart full of luggage", "annotations": [{"polygon": [[107, 357], [113, 372], [68, 398], [64, 382]], "text": "GME-004", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "GME4004", "recog_valid": false, "glyph_recog_text": "GME-004", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435090.jpg", "caption": "a tall building with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172947.jpg", "caption": "a clock on a brick wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435096.jpg", "caption": "a group of women riding horses in a parade", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435098.jpg", "caption": "a man in an apron standing in front of a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172957.jpg", "caption": "a baby sitting on a man's lap", "annotations": [{"polygon": [[312, 96], [305, 136], [339, 143], [362, 115]], "text": "195", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "195", "recog_valid": true, "glyph_recog_text": "195", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566175.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[386, 116], [387, 147], [438, 150], [437, 130]], "text": "Spring", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Spring", "recog_valid": true, "glyph_recog_text": "Spring", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041890.jpg", "caption": "a group of men on motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304040.jpg", "caption": "a red train traveling down a track next to a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172974.jpg", "caption": "a man sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435124.jpg", "caption": "a bench and a bike by the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435142.jpg", "caption": "a small airplane flying over a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173001.jpg", "caption": "a green truck with a man sitting in the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435161.jpg", "caption": "two pictures of soccer players in action", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435163.jpg", "caption": "a street sign with two street signs on it", "annotations": [{"polygon": [[126, 214], [192, 233], [190, 269], [123, 255]], "text": "BUTE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BUTE", "recog_valid": true, "glyph_recog_text": "BUTE", "glyph_recog_ld": 1.0}, {"polygon": [[180, 375], [263, 375], [260, 440], [176, 436]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[245, 326], [389, 319], [385, 282], [247, 288]], "text": "BOTETOURT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BOTETOURT", "recog_valid": true, "glyph_recog_text": "BOTETOURT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173032.jpg", "caption": "a little girl in a birthday hat eating cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041962.jpg", "caption": "a woman is playing tennis", "annotations": [{"polygon": [[355, 148], [390, 125], [398, 139], [361, 163]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "P", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566258.jpg", "caption": "a clock on the side of a building in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566262.jpg", "caption": "a blue train is on a hillside near a green field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435210.jpg", "caption": "a desk with a computer and a coke can on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304143.jpg", "caption": "a laptop computer with a can of red bull", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042003.jpg", "caption": "a banana sitting on a stone ledge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435242.jpg", "caption": "a book with an illustration of a bird on a branch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566325.jpg", "caption": "a table with a sewing kit, scissors, and other tools", "annotations": [{"polygon": [[316, 187], [343, 197], [321, 237], [292, 230]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "9", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[335, 211], [396, 232], [388, 257], [323, 233]], "text": "YES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YES", "recog_valid": true, "glyph_recog_text": "YES", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042044.jpg", "caption": "a baseball player in a red and grey uniform holding a bat", "annotations": [{"polygon": [[177, 233], [174, 289], [200, 275], [224, 262], [248, 259], [282, 259], [293, 269], [303, 237], [283, 226], [260, 223], [217, 226]], "text": "ANGELS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ANGEL", "recog_valid": false, "glyph_recog_text": "ANGELS", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[278, 282], [258, 321], [258, 328], [273, 334], [280, 333], [291, 327], [297, 321], [297, 277]], "text": "46", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "97", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173121.jpg", "caption": "a man in a blue shirt and green pants is skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304221.jpg", "caption": "a man sitting at a desk with a computer and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042079.jpg", "caption": "a man standing in front of a truck with stuffed animals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042081.jpg", "caption": "a child sitting on a table with a donut and a donut with sprinkles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304236.jpg", "caption": "a truck with an orange and white paint job", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435322.jpg", "caption": "a baseball player pitching a ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566395.jpg", "caption": "a yellow taxi cab is stopped at a red light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435326.jpg", "caption": "a man riding a snowboard down a snowy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435327.jpg", "caption": "a double decker bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566419.jpg", "caption": "a group of people flying kites in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566432.jpg", "caption": "a donut on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435360.jpg", "caption": "a pedestrian crossing sign on a pole", "annotations": [{"polygon": [[267, 192], [274, 183], [295, 173], [299, 185], [292, 200], [273, 206], [272, 194]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "10", "recog_valid": true, "glyph_recog_text": "10", "glyph_recog_ld": 1.0}, {"polygon": [[311, 164], [374, 131], [377, 157], [312, 188]], "text": "MPH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MPH", "recog_valid": true, "glyph_recog_text": "MPH", "glyph_recog_ld": 1.0}, {"polygon": [[245, 310], [413, 249], [416, 275], [244, 334]], "text": "PEDESTRIAN CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PEDESTRIAN", "recog_valid": false, "glyph_recog_text": "PEDESTRIAN CROSSING", "glyph_recog_ld": 0.5263160387810322}, {"polygon": [[249, 339], [400, 291], [404, 318], [392, 323], [258, 362], [252, 359]], "text": "CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CROSSING", "recog_valid": true, "glyph_recog_text": "CROSSING", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042156.jpg", "caption": "a yellow train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042165.jpg", "caption": "a group of birds sitting on the ground near a hut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304319.jpg", "caption": "a cell phone sitting on a desk next to a flyer", "annotations": [{"polygon": [[209, 335], [265, 286], [272, 299], [198, 391]], "text": "ACTIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ACme", "recog_valid": false, "glyph_recog_text": "ACTIVE", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566482.jpg", "caption": "two carrots are sitting on a blue plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566488.jpg", "caption": "a man sitting on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173284.jpg", "caption": "an older couple cutting a cake in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566501.jpg", "caption": "a black and white photo of a steam train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566514.jpg", "caption": "a man and a dog standing next to a food cart", "annotations": [{"polygon": [[179, 310], [170, 323], [163, 333], [164, 342], [177, 344], [190, 343], [242, 344], [245, 337], [245, 329], [204, 323], [191, 313]], "text": "Grlotto", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Seloio", "recog_valid": false, "glyph_recog_text": "Grlotto", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435445.jpg", "caption": "a van and a truck parked at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304382.jpg", "caption": "a street sign with two arrows pointing to the direction of the freeway", "annotations": [{"polygon": [[230, 68], [231, 104], [260, 108], [296, 114], [300, 105], [298, 85], [291, 77], [259, 71], [236, 66]], "text": "123", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "123", "recog_valid": true, "glyph_recog_text": "123", "glyph_recog_ld": 1.0}, {"polygon": [[248, 219], [249, 262], [295, 264], [294, 223]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "13", "recog_valid": true, "glyph_recog_text": "13", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304384.jpg", "caption": "a knife and apple on a cutting board", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435456.jpg", "caption": "a man dressed as a clown with a carrot in his mouth", "annotations": [{"polygon": [[373, 1], [374, 42], [427, 39], [427, -1]], "text": "B", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "BC", "recog_valid": false, "glyph_recog_text": "B", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566536.jpg", "caption": "a boy sitting on a couch with stickers on his laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435468.jpg", "caption": "a woman cutting grapes", "annotations": [{"polygon": [[128, 250], [133, 198], [175, 196], [173, 245]], "text": "49", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "49", "recog_valid": true, "glyph_recog_text": "4", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[164, 270], [164, 300], [196, 300], [197, 271]], "text": "29", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "29", "recog_valid": true, "glyph_recog_text": "29", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042254.jpg", "caption": "a sheep standing in the middle of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435482.jpg", "caption": "a bus driving down a street in the city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566575.jpg", "caption": "a pizza on a table", "annotations": [{"polygon": [[165, 156], [109, 192], [113, 196], [171, 159]], "text": "GUATEMALA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OUATEMALA", "recog_valid": false, "glyph_recog_text": "rpeauh..", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566583.jpg", "caption": "a living room with a television, a laptop, and a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304445.jpg", "caption": "a group of people riding bicycles next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042312.jpg", "caption": "a man holding a box of donuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042342.jpg", "caption": "a man sitting on the ground with his luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173422.jpg", "caption": "a baseball game with players on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173430.jpg", "caption": "a group of people walking down a street with police officers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042371.jpg", "caption": "a horse drawn carriage is parked in front of a market", "annotations": [{"polygon": [[266, 173], [266, 173], [273, 190], [346, 174], [344, 155]], "text": "PUBLIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UBIIC", "recog_valid": false, "glyph_recog_text": "PUBLIC", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[267, 197], [269, 216], [333, 208], [332, 185]], "text": "RKET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DKET", "recog_valid": false, "glyph_recog_text": "RKET", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304518.jpg", "caption": "a large airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173448.jpg", "caption": "a young boy standing next to a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304530.jpg", "caption": "a street with many people walking and sitting on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173475.jpg", "caption": "a navy ship in the water near a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435620.jpg", "caption": "a croissant sandwich with ham and eggs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042407.jpg", "caption": "a manatee rescue truck parked in front of a building", "annotations": [{"polygon": [[24, 224], [30, 226], [31, 237], [32, 247], [37, 257], [45, 262], [66, 266], [66, 272], [58, 272], [37, 264], [25, 246]], "text": "CONSERVATION", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CONSERVATIO", "recog_valid": false, "glyph_recog_text": "CXOANBEROFFHEAN", "glyph_recog_ld": 0.3333337777774814}, {"polygon": [[98, 227], [99, 238], [98, 245], [90, 256], [83, 260], [72, 264], [71, 271], [73, 271], [86, 266], [97, 259], [104, 247], [105, 227]], "text": "COMMISSION", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "COATAMISSION", "recog_valid": false, "glyph_recog_text": "CUHAPBDICN", "glyph_recog_ld": 0.33333388888842586}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435623.jpg", "caption": "graffiti on a train car has texts written on it", "annotations": [{"polygon": [[394, 280], [403, 305], [434, 299], [428, 275]], "text": "HIGH", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "HiGl", "recog_valid": false, "glyph_recog_text": "HIGH", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304552.jpg", "caption": "a silver train sitting at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042427.jpg", "caption": "a refrigerator with food in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173506.jpg", "caption": "a baseball player in uniform standing on a field", "annotations": [{"polygon": [[141, 13], [138, 92], [353, 100], [355, 31]], "text": "Torona", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Corona", "recog_valid": false, "glyph_recog_text": "Torona", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[244, 139], [244, 193], [313, 193], [314, 142]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "20", "recog_valid": true, "glyph_recog_text": "20", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435653.jpg", "caption": "two goats standing in front of a cage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304590.jpg", "caption": "a large airplane with people boarding it", "annotations": [{"polygon": [[198, 250], [206, 276], [353, 280], [347, 263], [309, 258]], "text": "Ethinian", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Etaaan", "recog_valid": false, "glyph_recog_text": "Ethinian", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566737.jpg", "caption": "a person walking down the street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042458.jpg", "caption": "an old black and white photo of a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566752.jpg", "caption": "chanel desk with magazine, coffee, and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435688.jpg", "caption": "a table with cupcakes and a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435695.jpg", "caption": "a young girl holding a baseball bat in a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566785.jpg", "caption": "a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435718.jpg", "caption": "a man in a green shirt is playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173595.jpg", "caption": "a man standing next to a motorcycle with bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173596.jpg", "caption": "a fighter jet flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042529.jpg", "caption": "a red car driving down a street in front of big ben", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435750.jpg", "caption": "a traffic light with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173607.jpg", "caption": "a street sign with a chinese writing on it", "annotations": [{"polygon": [[392, 260], [393, 281], [446, 291], [443, 271]], "text": "DUMPING", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "DUMPING", "recog_valid": true, "glyph_recog_text": "DUMPING", "glyph_recog_ld": 1.0}, {"polygon": [[244, 141], [244, 141], [242, 170], [319, 188], [320, 162]], "text": "Spadina", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Spadina", "recog_valid": true, "glyph_recog_text": "Spadina", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042540.jpg", "caption": "a plate with a banana bread and a cup of coffee", "annotations": [{"polygon": [[104, 216], [116, 219], [135, 192], [128, 183]], "text": "Just", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "30-2", "recog_valid": false, "glyph_recog_text": "lust", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[159, 177], [140, 207], [134, 223], [102, 229], [70, 238], [112, 240], [144, 232], [160, 211], [169, 182]], "text": "Just Do", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Just Do", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566839.jpg", "caption": "a television screen showing a horse race", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435764.jpg", "caption": "a red truck is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173632.jpg", "caption": "a young girl on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435777.jpg", "caption": "citation xc-300 - c-300 - c-300 - c-300 - c-300 - c-300 - c-300 - c", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304716.jpg", "caption": "a laptop with a spoon and yogurt next to it", "annotations": [{"polygon": [[372, 303], [366, 332], [385, 334], [406, 333], [426, 326], [440, 318], [447, 311], [452, 286], [443, 291], [435, 299], [421, 303], [409, 305], [396, 305], [385, 303], [378, 303]], "text": "Dani", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Dmi", "recog_valid": false, "glyph_recog_text": "Dani", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435814.jpg", "caption": "two men signing papers at a table with a green wall behind them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304746.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173682.jpg", "caption": "a baseball player pitching a ball on a field", "annotations": [{"polygon": [[101, 156], [110, 154], [121, 155], [131, 156], [146, 159], [152, 160], [173, 161], [179, 167], [187, 160], [189, 148], [189, 146], [161, 147], [160, 140], [154, 139], [152, 143], [139, 142], [138, 136], [133, 135], [127, 139], [117, 139], [117, 133], [103, 135], [100, 147], [100, 155]], "text": "Fritoley", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Fritolev", "recog_valid": false, "glyph_recog_text": "Fritoley", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173684.jpg", "caption": "a couple of bananas hanging from a metal stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304759.jpg", "caption": "a laptop computer sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566935.jpg", "caption": "a green and white fishing boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042671.jpg", "caption": "a small black horse standing in a field", "annotations": [{"polygon": [[35, 227], [430, 230], [433, 276], [35, 269]], "text": "TAYTI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TAYTT AUTA", "recog_valid": false, "glyph_recog_text": "T A YTI", "glyph_recog_ld": 0.3000006999993}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042675.jpg", "caption": "a cow standing in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435897.jpg", "caption": "a horse and carriage ride down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042682.jpg", "caption": "a busy city street at night with traffic lights", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042690.jpg", "caption": "two motorcycles parked next to each other on a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042698.jpg", "caption": "a room with a television, a computer, and a camera", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566988.jpg", "caption": "a person holding a glass of wine and a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435920.jpg", "caption": "a pizza on a table with a knife and fork", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042705.jpg", "caption": "a man on a scooter is stopped at a crosswalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435929.jpg", "caption": "a police car is parked at a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435933.jpg", "caption": "a red fire hydrant in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304866.jpg", "caption": "a group of people standing near an airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173801.jpg", "caption": "a man in a tie and shirt standing in front of a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567018.jpg", "caption": "two glazed donuts on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435951.jpg", "caption": "four men standing on a tennis court holding tennis rackets", "annotations": [{"polygon": [[148, 242], [184, 254], [175, 280], [143, 273]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "W", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[214, 269], [255, 279], [243, 314], [208, 304]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "W", "recog_valid": true, "glyph_recog_text": "W", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042740.jpg", "caption": "a bowl of vegetables in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042751.jpg", "caption": "a clock made out of books on a shelf", "annotations": [{"polygon": [[283, 145], [238, 144], [239, 109], [283, 110]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "12", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304921.jpg", "caption": "a woman in a red jacket is standing on skis in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173856.jpg", "caption": "a desk with a keyboard, a camera, a book, and a mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436014.jpg", "caption": "a man sitting at a table with two cats", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042804.jpg", "caption": "a group of people sitting on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173876.jpg", "caption": "a double decker bus parked on a cobblestone street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304950.jpg", "caption": "a laptop computer, a purse, a wallet, a cell phone, a camera, a passport, a passport holder, a passport cover, a passport card,", "annotations": [{"polygon": [[117, 386], [128, 389], [154, 327], [142, 322]], "text": "GETTING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GETTING", "recog_valid": true, "glyph_recog_text": "GETTING", "glyph_recog_ld": 1.0}, {"polygon": [[154, 333], [133, 386], [144, 390], [164, 335]], "text": "THINGS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THINGS", "recog_valid": true, "glyph_recog_text": "THINOS", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304949.jpg", "caption": "a large orange truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042808.jpg", "caption": "a hand holding a carrot and a potato", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173887.jpg", "caption": "a stop sign on a sidewalk with a bus in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567106.jpg", "caption": "a dog sitting at a table with a sandwich on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173893.jpg", "caption": "a man and a woman holding a coke and a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436048.jpg", "caption": "a sign on a pole", "annotations": [{"polygon": [[164, 242], [163, 283], [267, 271], [270, 245]], "text": "RACRIKS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RAORKS", "recog_valid": false, "glyph_recog_text": "RACRIKS", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[108, 246], [108, 282], [153, 268], [152, 243]], "text": "OEO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OEO", "recog_valid": true, "glyph_recog_text": "OEO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436070.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304997.jpg", "caption": "a baseball game is being played at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042853.jpg", "caption": "a cake with candles on it", "annotations": [{"polygon": [[354, 228], [346, 108], [277, 113], [272, 270]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "39", "recog_valid": false, "glyph_recog_text": "m", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042862.jpg", "caption": "a man and woman sitting on a bench in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042865.jpg", "caption": "a black cat on a bookshelf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436085.jpg", "caption": "an old black and white photo of a train on a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173948.jpg", "caption": "a man is playing tennis", "annotations": [{"polygon": [[350, 305], [423, 288], [423, 320], [356, 336]], "text": "ADE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ADEI", "recog_valid": false, "glyph_recog_text": "ADE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173978.jpg", "caption": "a refrigerator with a bunch of stuff on it", "annotations": [{"polygon": [[228, 430], [267, 411], [260, 398], [221, 418]], "text": "ATACAMA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ATACAMA", "recog_valid": true, "glyph_recog_text": "ATACAMA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305076.jpg", "caption": "two hummingbirds flying in the air near each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174015.jpg", "caption": "cupcakes under cover", "annotations": [{"polygon": [[79, 99], [171, 111], [179, 95], [185, 100], [185, 112], [215, 104], [217, 130], [133, 132], [132, 148], [119, 143], [120, 129], [74, 131]], "text": "Cupcakes", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Gwpeahes", "recog_valid": false, "glyph_recog_text": "Cupcakes", "glyph_recog_ld": 0.5000006249992187}, {"polygon": [[222, 102], [290, 97], [293, 109], [308, 104], [322, 129], [224, 131]], "text": "under", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "undet", "recog_valid": false, "glyph_recog_text": "under", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436161.jpg", "caption": "a piece of cake on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567234.jpg", "caption": "a stop sign and a one way sign on a brick building", "annotations": [{"polygon": [[95, 199], [95, 199], [98, 224], [146, 222], [155, 194]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174026.jpg", "caption": "a brick building with a wooden bench and a wooden barrel", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174028.jpg", "caption": "a boy sitting on a wooden bench with a girl sitting on his lap", "annotations": [{"polygon": [[444, 166], [444, 166], [490, 166], [488, 140], [468, 133], [448, 138], [442, 162]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "KQ", "recog_valid": false, "glyph_recog_text": "W", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[388, 150], [398, 146], [399, 128], [393, 114], [383, 112], [369, 118], [367, 133], [370, 147], [386, 152]], "text": "T", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "D", "recog_valid": false, "glyph_recog_text": "片", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[317, 112], [309, 127], [295, 130], [282, 124], [276, 114], [288, 117], [288, 91], [306, 91], [304, 110], [307, 120], [315, 108]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "c", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[95, 90], [95, 90], [95, 95], [87, 95], [73, 103], [75, 123], [67, 132], [56, 124], [56, 121], [58, 120], [68, 125], [71, 119], [70, 105], [57, 98], [54, 95], [50, 95], [47, 91], [56, 89], [70, 98], [82, 91], [87, 89]], "text": "J", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "J", "recog_valid": true, "glyph_recog_text": "J", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042960.jpg", "caption": "dale sign with a statue of a man on a ladder", "annotations": [{"polygon": [[234, 235], [269, 234], [269, 189], [235, 192]], "text": "C", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "M", "recog_valid": false, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305106.jpg", "caption": "two women are singing in front of a microphone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567252.jpg", "caption": "two boats are sitting on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042975.jpg", "caption": "a cat sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174048.jpg", "caption": "a pizza on a pink plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567274.jpg", "caption": "a woman standing in front of a fruit stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567278.jpg", "caption": "a white cat sitting on top of a pizza box", "annotations": [{"polygon": [[114, 378], [107, 355], [228, 336], [242, 358], [205, 367], [153, 376], [115, 378]], "text": "HUT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "cn", "recog_valid": false, "glyph_recog_text": "H U T", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436221.jpg", "caption": "a young boy holding a piece of paper in front of a hut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305156.jpg", "caption": "a baseball player throwing a ball", "annotations": [{"polygon": [[225, 144], [271, 151], [299, 156], [332, 176], [342, 189], [317, 204], [286, 182], [254, 174], [228, 164]], "text": "GIANTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SIANTS", "recog_valid": false, "glyph_recog_text": "GIANTS", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567301.jpg", "caption": "a man and a woman standing in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043016.jpg", "caption": "air canada airbus a320-214-2-cw", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043033.jpg", "caption": "a person holding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567332.jpg", "caption": "a parking meter with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567357.jpg", "caption": "a woman in pink tennis outfit swinging a tennis racket", "annotations": [{"polygon": [[86, 145], [85, 175], [161, 175], [163, 141]], "text": "ergia", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ergia", "recog_valid": true, "glyph_recog_text": "ergia", "glyph_recog_ld": 1.0}, {"polygon": [[85, 23], [86, 119], [347, 122], [346, 27]], "text": "AND", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "anD", "recog_valid": false, "glyph_recog_text": "AND", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436287.jpg", "caption": "a large building with a clock tower on top", "annotations": [{"polygon": [[363, 425], [363, 425], [364, 441], [424, 440], [436, 450], [442, 446], [442, 440], [491, 446], [507, 438], [494, 418], [483, 413], [474, 426], [418, 418], [409, 415], [389, 418], [379, 412], [371, 422]], "text": "thinkingmedia", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "ekiri. kiongwedla", "recog_valid": false, "glyph_recog_text": "hinkingmedia", "glyph_recog_ld": 0.4705885467126195}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567372.jpg", "caption": "a train parked next to a building with a castle in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043093.jpg", "caption": "banana and oats with granola and vanilla", "annotations": [{"polygon": [[289, 157], [280, 119], [304, 116], [397, 114], [449, 121], [449, 164], [415, 158], [340, 153], [312, 151]], "text": "Stonyfiel", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Stonyfie", "recog_valid": false, "glyph_recog_text": "Stonyfiel", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174167.jpg", "caption": "a street at night with cars driving by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567384.jpg", "caption": "a person riding a surfboard on a wave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567396.jpg", "caption": "a group of people riding horses down a cobblestone street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305254.jpg", "caption": "a black and white photo of a taxi cab on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305256.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436333.jpg", "caption": "a group of people standing around a table with bags of toys", "annotations": [{"polygon": [[216, 392], [237, 382], [247, 377], [256, 368], [263, 359], [272, 348], [282, 360], [275, 369], [266, 381], [256, 391], [246, 398], [238, 401], [228, 404]], "text": "WINNER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VIAAEH", "recog_valid": false, "glyph_recog_text": "WINNER", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305259.jpg", "caption": "a man walking down the street in front of a store with a sign that says kings sky canal st", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567407.jpg", "caption": "a silver flip phone next to a black flip phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305283.jpg", "caption": "a small white dog is standing next to a bicycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567430.jpg", "caption": "a large jetliner on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436362.jpg", "caption": "a group of sailboats floating on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436370.jpg", "caption": "a man sitting on the snow with a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174228.jpg", "caption": "a woman sitting on a bench with a dog", "annotations": [{"polygon": [[13, 124], [19, 117], [38, 120], [38, 128], [210, 151], [211, 173], [35, 158], [14, 159]], "text": "SHAKESPEARE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SHAKESPEARE", "recog_valid": true, "glyph_recog_text": "SHAKESPEARE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436383.jpg", "caption": "a display case with a variety of pastries and a christmas tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436394.jpg", "caption": "a man in an orange shirt is about to hit a tennis ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436406.jpg", "caption": "a bus with a giant rat on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043191.jpg", "caption": "a person holding up a book about national train day", "annotations": [{"polygon": [[72, 155], [73, 177], [240, 169], [240, 165], [234, 165], [233, 165], [232, 147]], "text": "NATIONAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NATIONAL", "recog_valid": true, "glyph_recog_text": "NATIONAL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174263.jpg", "caption": "a shoe is hanging from a stop sign", "annotations": [{"polygon": [[173, 166], [232, 179], [262, 189], [345, 184], [345, 184], [368, 231], [324, 240], [276, 241], [237, 235], [166, 225], [167, 220]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043190.jpg", "caption": "a refrigerator with a lot of stuff on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174303.jpg", "caption": "a young boy sitting on a couch with a remote control", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436453.jpg", "caption": "a woman reading to two children in bed", "annotations": [{"polygon": [[114, 268], [128, 279], [145, 293], [173, 307], [167, 314], [139, 302], [118, 286], [111, 282]], "text": "Mathey", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Moda", "recog_valid": false, "glyph_recog_text": "Mathey", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436455.jpg", "caption": "a man brushing his teeth in front of a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305384.jpg", "caption": "a detour sign on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436464.jpg", "caption": "a street sign with two signs on it", "annotations": [{"polygon": [[201, 103], [204, 129], [240, 116], [248, 87]], "text": "City", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "City", "recog_valid": true, "glyph_recog_text": "City", "glyph_recog_ld": 1.0}, {"polygon": [[200, 131], [320, 75], [322, 96], [200, 155]], "text": "PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Milwaukee", "recog_valid": false, "glyph_recog_text": "PARKING", "glyph_recog_ld": 1.1111098765503868e-06}, {"polygon": [[164, 172], [163, 193], [200, 178], [200, 157]], "text": "NO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NO", "recog_valid": true, "glyph_recog_text": "NO", "glyph_recog_ld": 1.0}, {"polygon": [[205, 151], [205, 174], [323, 116], [319, 97]], "text": "PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PARKING", "recog_valid": true, "glyph_recog_text": "PARKING", "glyph_recog_ld": 1.0}, {"polygon": [[167, 259], [167, 264], [171, 266], [233, 240], [233, 229]], "text": "EMERGENGY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MERGENCY", "recog_valid": false, "glyph_recog_text": "SMERGENSY", "glyph_recog_ld": 0.7777780246910837}, {"polygon": [[238, 227], [238, 237], [320, 201], [320, 190]], "text": "REGULATIONS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REGULATIONS", "recog_valid": true, "glyph_recog_text": "HEHUEATIONS", "glyph_recog_ld": 0.7272729752063862}, {"polygon": [[198, 232], [198, 241], [247, 220], [246, 210]], "text": "PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PARKING", "recog_valid": true, "glyph_recog_text": "自3技容中3", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[242, 196], [242, 209], [288, 188], [287, 176]], "text": "PERMIT.", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PERMIT", "recog_valid": false, "glyph_recog_text": "PERMIT.", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[193, 219], [193, 231], [236, 212], [236, 200]], "text": "POLICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "POLICE", "recog_valid": true, "glyph_recog_text": "POLIC老", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[265, 171], [266, 182], [319, 158], [321, 144]], "text": "WITHOUT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WITHOUT", "recog_valid": true, "glyph_recog_text": "WITHOUT", "glyph_recog_ld": 1.0}, {"polygon": [[214, 195], [214, 206], [260, 185], [260, 173]], "text": "STREEET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STREFT", "recog_valid": false, "glyph_recog_text": "STEEET", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[261, 150], [262, 168], [321, 140], [319, 121]], "text": "6AM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6AM", "recog_valid": true, "glyph_recog_text": "6AM", "glyph_recog_ld": 1.0}, {"polygon": [[166, 196], [165, 214], [220, 188], [220, 170]], "text": "2AM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2AM", "recog_valid": true, "glyph_recog_text": "2AM", "glyph_recog_ld": 1.0}, {"polygon": [[296, 322], [296, 357], [449, 301], [448, 264]], "text": "EDGE WOOD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EDGEWOOD", "recog_valid": false, "glyph_recog_text": "EDGE WOOD", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[85, 217], [78, 250], [205, 340], [209, 306]], "text": "OAKLAND AV", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OAKLAND", "recog_valid": false, "glyph_recog_text": "OAKLAND AV", "glyph_recog_ld": 0.7000002999996999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305409.jpg", "caption": "a vintage bathroom with a toilet, sink and bathtub", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043269.jpg", "caption": "a road sign that is on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043270.jpg", "caption": "a display of televisions", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305422.jpg", "caption": "a mouse and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174354.jpg", "caption": "a police officer is standing next to a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174357.jpg", "caption": "a vintage car with surfboards on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305431.jpg", "caption": "a group of brown cows standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436521.jpg", "caption": "a woman is playing tennis on a court", "annotations": [{"polygon": [[97, 120], [125, 115], [121, 107], [134, 104], [133, 112], [154, 109], [158, 116], [161, 133], [155, 136], [134, 139], [138, 146], [124, 150], [126, 140], [97, 148]], "text": "NEXUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "NE*US", "recog_valid": false, "glyph_recog_text": "NEXUS", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174406.jpg", "caption": "a yellow sign with a black and white image of a wizard", "annotations": [{"polygon": [[267, 310], [278, 330], [335, 320], [330, 299]], "text": "YOU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YOU", "recog_valid": true, "glyph_recog_text": "YOU", "glyph_recog_ld": 1.0}, {"polygon": [[342, 298], [346, 320], [466, 299], [466, 277]], "text": "CANNOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CANNOT", "recog_valid": true, "glyph_recog_text": "CANNOT", "glyph_recog_ld": 1.0}, {"polygon": [[338, 346], [342, 367], [419, 353], [413, 333]], "text": "PASS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PASS", "recog_valid": true, "glyph_recog_text": "PASS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567637.jpg", "caption": "a man is holding a lemon in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174440.jpg", "caption": "a man is sitting in a small boat on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305527.jpg", "caption": "a group of people playing frisbee on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305538.jpg", "caption": "a stop sign on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174470.jpg", "caption": "a group of women standing around a table with food", "annotations": [{"polygon": [[468, 91], [506, 95], [500, 134], [494, 121], [485, 123], [428, 121], [427, 106]], "text": "Cookin", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "cooki", "recog_valid": false, "glyph_recog_text": "Cookin", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174474.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[173, 127], [174, 162], [253, 115], [246, 81]], "text": "BATES", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BATES", "recog_valid": true, "glyph_recog_text": "BATES", "glyph_recog_ld": 1.0}, {"polygon": [[146, 259], [138, 335], [280, 362], [285, 296]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[178, 355], [174, 389], [210, 392], [211, 350], [190, 349]], "text": "MY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NN", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[213, 358], [218, 392], [237, 400], [275, 391], [281, 378], [275, 361], [252, 359]], "text": "SPACE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPAGF", "recog_valid": false, "glyph_recog_text": "SPACE", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305549.jpg", "caption": "a yellow bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436627.jpg", "caption": "two parking meters are shown in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174496.jpg", "caption": "a person is taking a piece of pizza out of a toaster", "annotations": [{"polygon": [[443, 263], [477, 176], [465, 165], [441, 233], [439, 231], [433, 233], [433, 240], [434, 251], [441, 264]], "text": "Budweiser", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Etimienn", "recog_valid": false, "glyph_recog_text": "Budweiser", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305569.jpg", "caption": "a large airplane with a space shuttle on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305573.jpg", "caption": "a group of people riding bicycles and motorcycles down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043431.jpg", "caption": "a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436651.jpg", "caption": "a truck and a boat are parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043451.jpg", "caption": "a man doing a trick on a skateboard", "annotations": [{"polygon": [[245, 99], [257, 95], [278, 88], [297, 77], [304, 88], [292, 96], [277, 101], [268, 105], [254, 107]], "text": "VAMONA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "yAND", "recog_valid": false, "glyph_recog_text": "VAMONA", "glyph_recog_ld": 0.3333344444425925}, {"polygon": [[186, 315], [188, 278], [233, 282], [232, 317]], "text": "DERY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "celà", "recog_valid": false, "glyph_recog_text": "DERY", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174527.jpg", "caption": "a white cat sitting on a wooden bench next to a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043464.jpg", "caption": "a stop sign with a one way sign on it", "annotations": [{"polygon": [[203, 110], [303, 122], [304, 164], [198, 153]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567754.jpg", "caption": "a cat sitting in a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043477.jpg", "caption": "a kite flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043506.jpg", "caption": "a train pulling into a station with a platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043513.jpg", "caption": "a man and a woman standing in front of a motorcycle shop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567827.jpg", "caption": "a baseball player in the middle of throwing a ball", "annotations": [{"polygon": [[266, 186], [265, 203], [274, 201], [289, 198], [298, 196], [308, 193], [317, 193], [321, 174], [313, 171], [295, 176], [286, 179]], "text": "TAYLO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TAYLD", "recog_valid": false, "glyph_recog_text": "TAYLO", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436759.jpg", "caption": "an old black and white photo of a small train with people on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305690.jpg", "caption": "a dog with a purple frisbee in its mouth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305693.jpg", "caption": "two people on a ski lift", "annotations": [{"polygon": [[160, 399], [169, 407], [223, 436], [236, 435], [171, 398], [163, 397]], "text": "FISCHER", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "254254", "recog_valid": false, "glyph_recog_text": "SrE白n零E", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174623.jpg", "caption": "a kitchen with shelves and a coffee maker", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567847.jpg", "caption": "a truck with graffiti on it is parked on the side of the road", "annotations": [{"polygon": [[140, 153], [216, 151], [270, 151], [269, 195], [143, 189], [145, 188], [150, 187], [141, 158]], "text": "V O T E", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YOTE", "recog_valid": false, "glyph_recog_text": "VOTE", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[43, 306], [55, 300], [61, 301], [70, 302], [76, 304], [82, 306], [92, 310], [94, 306], [96, 303], [100, 302], [102, 304], [103, 305], [106, 306], [108, 306], [114, 306], [118, 309], [117, 320], [121, 328], [121, 333], [112, 339], [106, 344], [98, 344], [94, 345], [89, 351], [84, 351], [81, 349], [82, 343], [81, 341], [76, 340], [73, 335], [68, 334], [63, 332], [52, 334], [47, 333], [38, 329]], "text": "CHRIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "2H.g", "recog_valid": false, "glyph_recog_text": "CHRIS", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[228, 306], [229, 304], [238, 303], [242, 304], [255, 304], [278, 304], [278, 351], [227, 350]], "text": "G5G", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CH", "recog_valid": false, "glyph_recog_text": "G5G", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[326, 160], [317, 249], [325, 250], [360, 243], [380, 243], [392, 247], [413, 249], [436, 250], [439, 240], [433, 178], [378, 166], [343, 161]], "text": "HAKER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MA", "recog_valid": false, "glyph_recog_text": "HAKER", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305710.jpg", "caption": "a man wearing a hat and a woman wearing a hat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043566.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567870.jpg", "caption": "a sheep with a baby", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436803.jpg", "caption": "a man wearing a catcher's gear walking on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305746.jpg", "caption": "fire trucks on the street with firetrucks and firetrucks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174694.jpg", "caption": "a book on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043629.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[1, 212], [1, 212], [170, 193], [168, 215], [3, 236]], "text": "boodles", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "BOODLES", "recog_valid": false, "glyph_recog_text": "boodles", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174705.jpg", "caption": "a person riding a dirt bike on a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305779.jpg", "caption": "a large airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174723.jpg", "caption": "a large passenger jet with colorful designs taking off", "annotations": [{"polygon": [[385, 287], [370, 283], [365, 285], [360, 281], [343, 275], [335, 273], [326, 270], [318, 268], [313, 266], [286, 260], [282, 244], [294, 248], [300, 250], [332, 260], [342, 265], [342, 262], [350, 265], [357, 267], [358, 270], [384, 276]], "text": "Fliegen", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "wcbenfliegen", "recog_valid": false, "glyph_recog_text": "Fllegen", "glyph_recog_ld": 0.41666715277737265}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043692.jpg", "caption": "a woman with a knife and a cake with a quote", "annotations": [{"polygon": [[339, 103], [336, 144], [375, 128], [371, 106]], "text": "for", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "lor", "recog_valid": false, "glyph_recog_text": "f", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[50, 4], [47, 18], [53, 36], [65, 33], [81, 33], [80, 17], [70, 6]], "text": "We", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "We", "recog_valid": true, "glyph_recog_text": "w", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[114, 348], [119, 364], [133, 361], [155, 358], [168, 360], [181, 365], [203, 374], [209, 355], [191, 345], [174, 338], [140, 340]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "APP", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[232, 111], [229, 133], [313, 130], [317, 143], [326, 141], [330, 123], [326, 106]], "text": "waiting", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "waitinrf", "recog_valid": false, "glyph_recog_text": "waiting", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[92, 100], [84, 139], [115, 141], [128, 125], [126, 105]], "text": "life", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "uu", "recog_valid": false, "glyph_recog_text": "!", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[44, 104], [40, 134], [78, 128], [78, 103]], "text": "the", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "le", "recog_valid": false, "glyph_recog_text": "the", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[194, 62], [191, 93], [301, 80], [297, 56], [219, 54]], "text": "planned,", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "tlaaaned", "recog_valid": false, "glyph_recog_text": "planned,", "glyph_recog_ld": 0.5000006249992187}, {"polygon": [[46, 53], [40, 91], [69, 94], [85, 80], [84, 57]], "text": "life", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "u", "recog_valid": false, "glyph_recog_text": "life", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[196, 10], [195, 40], [274, 36], [275, 48], [287, 46], [286, 5]], "text": "willing", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "williny", "recog_valid": false, "glyph_recog_text": "willing", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174774.jpg", "caption": "a computer monitor with a cat on it", "annotations": [{"polygon": [[258, 243], [260, 256], [291, 236], [288, 222]], "text": "YOUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YOUR", "recog_valid": true, "glyph_recog_text": "YOUR", "glyph_recog_ld": 1.0}, {"polygon": [[239, 273], [239, 282], [292, 253], [291, 241]], "text": "ARE YOUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EEHBOURS", "recog_valid": false, "glyph_recog_text": "AEYOUR", "glyph_recog_ld": 0.5000006249992187}, {"polygon": [[238, 289], [236, 307], [299, 275], [296, 254]], "text": "RAPTORS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RAPTORS", "recog_valid": true, "glyph_recog_text": "RAPTORS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043704.jpg", "caption": "a man riding a horse", "annotations": [{"polygon": [[0, 247], [118, 249], [118, 280], [0, 280]], "text": "LIGHT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "UGT", "recog_valid": false, "glyph_recog_text": "LIGHT", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305858.jpg", "caption": "a woman and child walking under a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174802.jpg", "caption": "a bathroom with a toilet and a shower", "annotations": [{"polygon": [[337, 49], [345, 59], [352, 47], [359, 38], [368, 33], [387, 35], [398, 52], [400, 64], [414, 61], [411, 44], [404, 30], [391, 19], [379, 16], [366, 18], [358, 22], [349, 32]], "text": "BIOHAZARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "o。", "recog_valid": false, "glyph_recog_text": "BIOH号5ZARO", "glyph_recog_ld": 0.1000008999991}, {"polygon": [[353, 64], [359, 40], [367, 36], [387, 40], [391, 68], [397, 89], [395, 101], [379, 117], [364, 109], [346, 110], [340, 102], [348, 91], [339, 89], [339, 78]], "text": "AUTHORIZED", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "mo", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568023.jpg", "caption": "a man sitting on a motorcycle in a showroom", "annotations": [{"polygon": [[331, -1], [297, 9], [303, 36], [398, 0]], "text": "YAMA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "YAMAU", "recog_valid": false, "glyph_recog_text": "YAM", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174814.jpg", "caption": "a bus is parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174831.jpg", "caption": "a group of people riding on the back of a cow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305905.jpg", "caption": "a bench with a bunch of old suitcases on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568050.jpg", "caption": "a red double decker bus driving down a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043773.jpg", "caption": "a man is cutting the hair of a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568062.jpg", "caption": "a white cat is sleeping on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043780.jpg", "caption": "a living room with a chair and a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568068.jpg", "caption": "a man in skis is standing on the snow", "annotations": [{"polygon": [[136, 103], [147, 135], [355, 140], [364, 108], [235, 103], [209, 101]], "text": "Vancover", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "vancgever", "recog_valid": false, "glyph_recog_text": "Vancover", "glyph_recog_ld": 0.6666670370366254}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043776.jpg", "caption": "an orange and black train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174869.jpg", "caption": "a white bed with a white pillow and a white pillow case", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568089.jpg", "caption": "a toilet sitting on the side of a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043809.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568113.jpg", "caption": "a kitchen with a sink, microwave, and a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568116.jpg", "caption": "a black and white photo of fruit and a bowl", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174901.jpg", "caption": "a bathroom with a sink and mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305973.jpg", "caption": "people crossing the street with umbrellas in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305980.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305985.jpg", "caption": "a bus is parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568137.jpg", "caption": "a plate of food with an egg, rice, and a banana", "annotations": [{"polygon": [[294, 2], [373, 21], [360, 40], [281, 23]], "text": "Marlboro", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Marlboro", "recog_valid": true, "glyph_recog_text": "Mariboro", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174934.jpg", "caption": "a clock tower on a building overlooking a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568155.jpg", "caption": "a woman with blue hair sitting in front of a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043876.jpg", "caption": "a shelf with various items on it", "annotations": [{"polygon": [[245, 190], [244, 235], [275, 235], [275, 191]], "text": "N", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Z", "recog_valid": false, "glyph_recog_text": "z", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437102.jpg", "caption": "a cat sitting on a bookcase in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437106.jpg", "caption": "a man and a woman water skiing on a lake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568187.jpg", "caption": "a man sitting on a chair next to a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437120.jpg", "caption": "a yellow school bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043912.jpg", "caption": "a street with cars parked on it and a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568208.jpg", "caption": "a woman is standing in front of a bunch of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437138.jpg", "caption": "a stop sign and a sign has texts", "annotations": [{"polygon": [[446, 207], [446, 207], [453, 216], [465, 292], [465, 296], [330, 339], [317, 256]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437148.jpg", "caption": "a man on a boat driving through the ocean", "annotations": [{"polygon": [[31, 418], [30, 389], [16, 356], [0, 334], [0, 417]], "text": "ER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "KSP", "recog_valid": false, "glyph_recog_text": "wr", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175013.jpg", "caption": "a black and white photo of a man on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175017.jpg", "caption": "a white truck is parked on the side of the road", "annotations": [{"polygon": [[407, 103], [443, 111], [443, 145], [407, 139]], "text": "KOO", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "KOO", "recog_valid": true, "glyph_recog_text": "koo", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[447, 112], [447, 112], [447, 112], [447, 145], [447, 145], [506, 156], [506, 156], [508, 129], [508, 129]], "text": "PRODUCE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "PRODUCE", "recog_valid": true, "glyph_recog_text": "PRODUCE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175020.jpg", "caption": "two buses are parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306095.jpg", "caption": "a bedroom with white furniture and a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175038.jpg", "caption": "a bunch of oranges in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437210.jpg", "caption": "a baseball player holding a bat in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043998.jpg", "caption": "a group of motorcycles parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437224.jpg", "caption": "a screen on an airplane with a message on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306161.jpg", "caption": "a black and white photo of a street with pigeons", "annotations": [{"polygon": [[102, 127], [99, 157], [72, 154], [76, 120], [81, 114], [96, 120]], "text": "NE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "zu", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044017.jpg", "caption": "a busy city street at night", "annotations": [{"polygon": [[463, 122], [400, 135], [405, 153], [468, 141]], "text": "NG", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "PHONG", "recog_valid": false, "glyph_recog_text": "NG", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[396, 119], [472, 100], [476, 120], [401, 134]], "text": "AY", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "ENeY", "recog_valid": false, "glyph_recog_text": "Ay", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[412, 95], [482, 72], [484, 91], [420, 114]], "text": "VIECHA", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "MCH", "recog_valid": false, "glyph_recog_text": "VIECHA", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568311.jpg", "caption": "an old photo of a street with buses and cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568330.jpg", "caption": "a panoramic view of the ocean and a ship", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175121.jpg", "caption": "a parking lot with a red bus and a blue bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175129.jpg", "caption": "india - road signs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306207.jpg", "caption": "a street sign with a red and white sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175143.jpg", "caption": "a yellow train traveling through the trees", "annotations": [{"polygon": [[192, 254], [191, 269], [305, 241], [305, 228], [208, 252], [208, 250]], "text": "Measurement", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Measurement", "recog_valid": true, "glyph_recog_text": "Measurement", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437292.jpg", "caption": "a skateboarder is doing a trick in the middle of a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568369.jpg", "caption": "a woman standing at a table with other people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175188.jpg", "caption": "a group of people on skateboards riding on the back of a green machine", "annotations": [{"polygon": [[125, 271], [228, 225], [224, 215], [120, 260]], "text": "MACHINES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MACHINES", "recog_valid": true, "glyph_recog_text": "MAGHINES", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[111, 277], [38, 309], [32, 297], [105, 266]], "text": "GREEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "GREEN", "recog_valid": true, "glyph_recog_text": "GREEN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175193.jpg", "caption": "a street sign with several signs in chinese", "annotations": [{"polygon": [[152, 103], [150, 114], [216, 175], [221, 159]], "text": "Hollywood", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Hollywood", "recog_valid": true, "glyph_recog_text": "Hollywaod", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[212, 325], [289, 297], [290, 312], [215, 342]], "text": "Escalator", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Escalator", "recog_valid": true, "glyph_recog_text": "Escalator", "glyph_recog_ld": 1.0}, {"polygon": [[94, 376], [94, 389], [133, 373], [133, 359]], "text": "Central", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Central", "recog_valid": true, "glyph_recog_text": "Canbsl", "glyph_recog_ld": 0.428572244896793}, {"polygon": [[156, 84], [154, 100], [219, 157], [221, 142], [182, 105]], "text": "Hollywood", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "荷李活道", "recog_valid": false, "glyph_recog_text": "Hollywaoe", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044127.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[188, 372], [213, 375], [222, 322], [193, 319]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CD", "recog_valid": false, "glyph_recog_text": "0", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175202.jpg", "caption": "a cat looking at a bird in a cage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568428.jpg", "caption": "a man running down the street with a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175238.jpg", "caption": "people are standing on the platform of a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568459.jpg", "caption": "a bus parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568462.jpg", "caption": "a baseball player throwing a pitch", "annotations": [{"polygon": [[313, 218], [341, 221], [332, 176], [301, 171]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GU", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[322, -1], [316, 17], [335, 29], [427, 31], [427, 0]], "text": "404", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "404", "recog_valid": true, "glyph_recog_text": "4 0 4", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175250.jpg", "caption": "a small plane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175252.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044204.jpg", "caption": "a slice of pizza on a paper plate", "annotations": [{"polygon": [[187, 38], [182, 77], [186, 84], [200, 83], [232, 72], [257, 67], [284, 59], [295, 46], [281, 27], [271, 16], [221, 26]], "text": "Sal's", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Sals", "recog_valid": false, "glyph_recog_text": "Sal's", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175277.jpg", "caption": "a fire truck parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044240.jpg", "caption": "a motorcycle parked on the side of the road", "annotations": [{"polygon": [[3, 386], [10, 399], [17, 409], [24, 415], [32, 421], [39, 424], [43, 419], [34, 415], [27, 410], [21, 403], [15, 396], [10, 386]], "text": "DUNLOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "KOTNA", "recog_valid": false, "glyph_recog_text": "DUNLOP", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306393.jpg", "caption": "three people posing with an elephant and a sign that says 350", "annotations": [{"polygon": [[170, 260], [177, 387], [355, 363], [365, 239]], "text": "350", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "350", "recog_valid": true, "glyph_recog_text": "350", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044267.jpg", "caption": "a refrigerator with food in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437485.jpg", "caption": "a man standing next to a sign that says welcome to zine library", "annotations": [{"polygon": [[134, 337], [223, 312], [219, 298], [124, 315]], "text": "WELCOME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WELCONE", "recog_valid": false, "glyph_recog_text": "WELCOME", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[156, 396], [159, 412], [199, 399], [196, 382]], "text": "READ", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "READ", "recog_valid": true, "glyph_recog_text": "READ", "glyph_recog_ld": 1.0}, {"polygon": [[193, 337], [197, 355], [269, 337], [265, 315]], "text": "LIBRARY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LIRRARY", "recog_valid": false, "glyph_recog_text": "LIBRARY", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[128, 351], [133, 374], [192, 355], [185, 336]], "text": "ZINE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZINE", "recog_valid": true, "glyph_recog_text": "ZINE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044277.jpg", "caption": "a dirty oven with food on the bottom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568566.jpg", "caption": "a baseball player in uniform holding a bat", "annotations": [{"polygon": [[227, 203], [223, 235], [301, 231], [305, 206]], "text": "Tigers", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "aey", "recog_valid": false, "glyph_recog_text": "Tigers", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437518.jpg", "caption": "a man standing in front of a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306447.jpg", "caption": "a train is pulling into a station with a platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437520.jpg", "caption": "a man is taking a picture of himself in a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568614.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[159, 321], [357, 320], [351, 238], [160, 234]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437542.jpg", "caption": "a man and a woman eating food at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437547.jpg", "caption": "two pictures of a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306485.jpg", "caption": "a desk with a laptop, a computer, and a monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175418.jpg", "caption": "a skyservice airbus a320-214-2-bw", "annotations": [{"polygon": [[328, 246], [327, 276], [391, 272], [392, 245]], "text": "SKY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Sky", "recog_valid": false, "glyph_recog_text": "SKY", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568640.jpg", "caption": "a man in an apron cooking food on a grill", "annotations": [{"polygon": [[88, 121], [137, 118], [130, 172], [87, 172]], "text": "fly", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "4", "recog_valid": false, "glyph_recog_text": "f", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175439.jpg", "caption": "a man on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568668.jpg", "caption": "a man in a costume stands next to a statue of liberty", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437597.jpg", "caption": "a street sign with a stop sign and a street sign", "annotations": [{"polygon": [[216, 304], [216, 304], [333, 223], [339, 243], [218, 325]], "text": "RIVERDALE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RIVERDALE", "recog_valid": true, "glyph_recog_text": "RIVERDALE", "glyph_recog_ld": 1.0}, {"polygon": [[249, 175], [305, 226], [301, 206], [247, 159]], "text": "LOGAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LLOGAN", "recog_valid": false, "glyph_recog_text": "LOGAN", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[390, 371], [404, 402], [431, 420], [510, 326], [511, 194]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STO", "recog_valid": false, "glyph_recog_text": "STO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044388.jpg", "caption": "a grey parrot eating a piece of food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175461.jpg", "caption": "a red bus is parked on the side of the road", "annotations": [{"polygon": [[379, 234], [381, 262], [422, 265], [421, 232]], "text": "SQL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SQL", "recog_valid": true, "glyph_recog_text": "SQL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568677.jpg", "caption": "a group of soldiers cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306535.jpg", "caption": "a gas station with a traffic light and a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437605.jpg", "caption": "a man in a wheelchair is being pushed by two other people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437611.jpg", "caption": "a large truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306540.jpg", "caption": "a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175484.jpg", "caption": "a laptop, a teddy bear, and a picture of a hello kitty", "annotations": [{"polygon": [[366, 287], [373, 309], [419, 288], [419, 273]], "text": "OREO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "OREO", "recog_valid": true, "glyph_recog_text": "OREO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306557.jpg", "caption": "a bathroom with green walls and white cabinets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306581.jpg", "caption": "a cow standing in the middle of a busy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306627.jpg", "caption": "a man cutting a cake with a knife", "annotations": [{"polygon": [[131, 422], [178, 438], [195, 429], [208, 408], [206, 404], [173, 391], [155, 390], [142, 397], [135, 404], [131, 413]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "oc", "recog_valid": false, "glyph_recog_text": "20", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437710.jpg", "caption": "a white cake with a heart on it", "annotations": [{"polygon": [[124, 269], [144, 276], [156, 271], [166, 249], [190, 209], [184, 202], [176, 201], [169, 211], [152, 239], [134, 239], [125, 253], [123, 269]], "text": "welcome", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "behcouss", "recog_valid": false, "glyph_recog_text": "welcome", "glyph_recog_ld": 0.37500078124902336}, {"polygon": [[173, 252], [178, 254], [184, 248], [205, 253], [215, 240], [230, 214], [233, 205], [228, 197], [220, 189], [218, 198], [212, 208], [205, 219], [197, 234], [180, 230], [178, 235]], "text": "Home", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ronw", "recog_valid": false, "glyph_recog_text": "Home", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[229, 257], [241, 260], [261, 254], [266, 258], [269, 265], [279, 266], [281, 260], [289, 225], [288, 184], [279, 178], [263, 177], [263, 181], [254, 185], [256, 199], [270, 204], [269, 219], [267, 224], [244, 222], [240, 227], [254, 232], [257, 239], [255, 247], [243, 242], [228, 248]], "text": "Sc", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "H PS", "recog_valid": false, "glyph_recog_text": "os", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437714.jpg", "caption": "a zebra walking in front of a chain link fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568790.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568810.jpg", "caption": "a group of people posing for a picture on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306672.jpg", "caption": "a black and white photo of a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437745.jpg", "caption": "a man is standing next to a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175609.jpg", "caption": "a pan filled with pasta and broccoli", "annotations": [{"polygon": [[4, 459], [25, 444], [69, 469], [76, 466], [145, 502], [132, 513], [118, 515]], "text": "BergHOFF", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "BergHOFE", "recog_valid": false, "glyph_recog_text": "BergHOFF", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437778.jpg", "caption": "a man in a suit standing in front of a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175651.jpg", "caption": "a person holding a blue box with a white label", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437801.jpg", "caption": "a plate of pizza and a bottle of soda", "annotations": [{"polygon": [[168, 111], [183, 109], [185, 115], [209, 108], [218, 100], [222, 111], [205, 123], [184, 130], [170, 132]], "text": "Zone", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Zona", "recog_valid": false, "glyph_recog_text": "Zone", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568880.jpg", "caption": "a group of people sitting at a table", "annotations": [{"polygon": [[92, 379], [102, 413], [158, 390], [145, 353]], "text": "SF", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "SF", "recog_valid": true, "glyph_recog_text": "SF", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044596.jpg", "caption": "a television mounted on a stand in a bedroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568884.jpg", "caption": "a red tow truck parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568895.jpg", "caption": "a baseball player is standing on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044611.jpg", "caption": "a man walking a horse down a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437831.jpg", "caption": "a young boy in a baseball uniform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044625.jpg", "caption": "a group of people walking down the street with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175699.jpg", "caption": "a large pile of apples", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437849.jpg", "caption": "a kitchen with a refrigerator and a microwave in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306789.jpg", "caption": "a motorcycle parked next to a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306791.jpg", "caption": "a table topped with plates of food and drinks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306800.jpg", "caption": "a man playing guitar on a chair next to a police officer on a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568948.jpg", "caption": "a woman standing next to a refrigerator with a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437892.jpg", "caption": "a double decker bus parked under a bridge", "annotations": [{"polygon": [[130, 235], [130, 235], [170, 232], [168, 201], [130, 204], [122, 212], [122, 220]], "text": "92", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "92", "recog_valid": true, "glyph_recog_text": "92", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437904.jpg", "caption": "a plate with a hot dog and french fries", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437911.jpg", "caption": "a woman standing in the middle of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044704.jpg", "caption": "a blender with a jar of red liquid sitting on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306866.jpg", "caption": "a row of parked bicycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306877.jpg", "caption": "a train on the tracks", "annotations": [{"polygon": [[195, 251], [195, 227], [235, 219], [236, 245]], "text": "BNSE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PNSE", "recog_valid": false, "glyph_recog_text": "BNSE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437949.jpg", "caption": "police motorcycle with a child on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044739.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044751.jpg", "caption": "a group of people sitting around a table with laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175830.jpg", "caption": "a cat sitting on a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569050.jpg", "caption": "a bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044763.jpg", "caption": "a newspaper with a toast and tomato sauce on it", "annotations": [{"polygon": [[80, 230], [176, 207], [171, 193], [116, 207], [76, 217]], "text": "Prudenntial", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Prudential", "recog_valid": false, "glyph_recog_text": "Prudenntial", "glyph_recog_ld": 0.909090991735462}, {"polygon": [[42, 178], [45, 201], [89, 191], [173, 172], [180, 175], [172, 153], [162, 150], [147, 152]], "text": "FINANC", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "FINANC", "recog_valid": true, "glyph_recog_text": "FINANC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437996.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044781.jpg", "caption": "a woman is standing next to a pile of apples", "annotations": [{"polygon": [[402, 250], [387, 327], [362, 324], [382, 245]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "—S", "recog_valid": false, "glyph_recog_text": "9", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[415, 255], [411, 285], [438, 289], [441, 256]], "text": "80", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "80", "recog_valid": true, "glyph_recog_text": "8", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044788.jpg", "caption": "a large group of luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569079.jpg", "caption": "a dump truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044792.jpg", "caption": "a clock on a post with a sign that says new buffalo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569088.jpg", "caption": "a kitchen counter with a bowl of food and a bowl of milk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438028.jpg", "caption": "a person on skis standing on a snowy mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044814.jpg", "caption": "a dog sitting in a canoe", "annotations": [{"polygon": [[12, 383], [86, 371], [83, 353], [11, 364]], "text": "Puranha", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "puranhq", "recog_valid": false, "glyph_recog_text": "Puranha", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044820.jpg", "caption": "a woman playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438047.jpg", "caption": "an old black and white photo of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438055.jpg", "caption": "a baseball player swinging at a pitch during a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306990.jpg", "caption": "a male tennis player is celebrating after winning a point", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306992.jpg", "caption": "a man sitting on the bow of a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175925.jpg", "caption": "a man is standing next to a dog crate", "annotations": [{"polygon": [[206, 252], [238, 271], [238, 282], [205, 265]], "text": "TUAPA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TUAPA", "recog_valid": true, "glyph_recog_text": "TUAFA", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306998.jpg", "caption": "a row of trucks parked in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175943.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175946.jpg", "caption": "a group of people are petting a cow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175951.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307025.jpg", "caption": "a microwave oven on a shelf next to a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438101.jpg", "caption": "a breakfast plate with an omelet and toast", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569177.jpg", "caption": "a woman sitting on a wall next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307033.jpg", "caption": "a room with a lot of luggage and a wheel chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307039.jpg", "caption": "a green blanket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569205.jpg", "caption": "three planes flying over a mountain range with parachutes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438145.jpg", "caption": "a boy throwing a baseball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044946.jpg", "caption": "a group of people sitting around a table with a pizza", "annotations": [{"polygon": [[82, 320], [92, 311], [97, 307], [106, 302], [115, 300], [122, 295], [123, 316], [112, 319], [102, 321], [99, 326], [93, 332], [94, 341], [90, 346], [84, 347], [81, 344], [80, 337], [80, 327]], "text": "GUINNESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "GUNEr", "recog_valid": false, "glyph_recog_text": "GUINNESS", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176029.jpg", "caption": "a bus driving down a street with people on it", "annotations": [{"polygon": [[205, 105], [203, 134], [297, 144], [294, 117]], "text": "CENTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CENTED", "recog_valid": false, "glyph_recog_text": "CENTER", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044962.jpg", "caption": "an old truck with a camper on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176040.jpg", "caption": "a person on a dirt bike doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307124.jpg", "caption": "two twin beds in a room with a lamp and a stuffed animal", "annotations": [{"polygon": [[9, 415], [10, 366], [94, 370], [96, 418]], "text": "Michael", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "MoME", "recog_valid": false, "glyph_recog_text": "Michael", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[109, 369], [108, 422], [193, 421], [192, 371]], "text": "PHOTOGRAPHY BY Hanscom", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307136.jpg", "caption": "a woman walking down a street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044996.jpg", "caption": "organic uncured beef hot dogs", "annotations": [{"polygon": [[150, 135], [139, 171], [265, 168], [262, 142]], "text": "Applegate", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Applegate", "recog_valid": true, "glyph_recog_text": "Applegate", "glyph_recog_ld": 1.0}, {"polygon": [[35, 174], [42, 210], [234, 210], [228, 174]], "text": "Organic", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Organic", "recog_valid": true, "glyph_recog_text": "Organic", "glyph_recog_ld": 1.0}, {"polygon": [[235, 171], [440, 165], [438, 194], [244, 201]], "text": "Uncured", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Uncured", "recog_valid": true, "glyph_recog_text": "Uncured", "glyph_recog_ld": 1.0}, {"polygon": [[184, 207], [185, 238], [278, 234], [274, 204]], "text": "Hot", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Hot", "recog_valid": true, "glyph_recog_text": "Hot", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307148.jpg", "caption": "a female tennis player in green and white uniform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176087.jpg", "caption": "a white truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176091.jpg", "caption": "a woman walking towards a plane on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569308.jpg", "caption": "a traffic light on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438252.jpg", "caption": "a laptop and a monitor on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307182.jpg", "caption": "a large white truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307196.jpg", "caption": "a man on a skateboard doing a trick on a ledge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045084.jpg", "caption": "a group of people sitting on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045089.jpg", "caption": "three police officers riding horses down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438333.jpg", "caption": "two men talking on cell phones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307264.jpg", "caption": "a street sign with a white and blue arrow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307266.jpg", "caption": "a baseball player in a maroon and white uniform pitching a ball", "annotations": [{"polygon": [[75, 255], [74, 284], [91, 285], [114, 300], [112, 287], [160, 287], [159, 256]], "text": "IKON", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "IKON", "recog_valid": true, "glyph_recog_text": "IKON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438341.jpg", "caption": "a parking meter with a yellow face on it", "annotations": [{"polygon": [[206, 163], [230, 203], [260, 191], [260, 174], [252, 163], [234, 158], [227, 158]], "text": "D", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "D", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307270.jpg", "caption": "two double decker buses parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307272.jpg", "caption": "a yellow fire hydrant on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176210.jpg", "caption": "a clock tower in front of a shopping center", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438368.jpg", "caption": "people walking in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438375.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[264, 253], [258, 229], [261, 216], [290, 202], [294, 208], [298, 241]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "20", "recog_valid": true, "glyph_recog_text": "20", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307318.jpg", "caption": "three surfers standing on the beach with their boards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307337.jpg", "caption": "a woman sitting in a chair looking out a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176265.jpg", "caption": "two men in white shirts standing next to a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569495.jpg", "caption": "a red fire hydrant sitting in front of a brick building", "annotations": [{"polygon": [[318, 110], [289, 165], [380, 219], [410, 155]], "text": "HYD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HYD", "recog_valid": true, "glyph_recog_text": "HYD", "glyph_recog_ld": 1.0}, {"polygon": [[415, 180], [489, 227], [472, 260], [396, 226]], "text": "HYDRANT", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "HYDRAHT", "recog_valid": false, "glyph_recog_text": "HYDORANT", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176282.jpg", "caption": "a double decker bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176285.jpg", "caption": "two men in the driver's seat of a green train", "annotations": [{"polygon": [[399, 355], [395, 405], [436, 405], [433, 352]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "oo", "recog_valid": false, "glyph_recog_text": "co", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569505.jpg", "caption": "a street with a bus and a lot of trash", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176298.jpg", "caption": "two pugs wearing green hats and bow ties", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045247.jpg", "caption": "two zebras standing next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569539.jpg", "caption": "a cat standing on a tv watching a tv show", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307426.jpg", "caption": "a man reading a book in bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438514.jpg", "caption": "a large building with a sign that says mfp", "annotations": [{"polygon": [[274, 334], [315, 326], [313, 292], [274, 301]], "text": "MI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "M", "recog_valid": false, "glyph_recog_text": "MI", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[317, 325], [314, 293], [342, 287], [345, 320]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "P", "recog_valid": true, "glyph_recog_text": "P", "glyph_recog_ld": 1.0}, {"polygon": [[346, 320], [343, 287], [371, 283], [374, 316]], "text": "F", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LL", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569591.jpg", "caption": "a couple walking down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438527.jpg", "caption": "a man standing next to a suitcase and a sign", "annotations": [{"polygon": [[246, 121], [354, 106], [350, 49], [239, 53]], "text": "VILLAINS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "VMLAINS", "recog_valid": false, "glyph_recog_text": "VILLANS", "glyph_recog_ld": 0.5714291836725947}, {"polygon": [[370, 32], [383, 104], [447, 95], [447, 33], [393, 25]], "text": "VOGU", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "VOGL", "recog_valid": false, "glyph_recog_text": ":", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176385.jpg", "caption": "a skateboard on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569607.jpg", "caption": "a piece of cake on a plate", "annotations": [{"polygon": [[217, 184], [243, 171], [262, 166], [270, 182], [294, 174], [292, 194], [249, 198], [230, 206], [217, 204], [213, 193]], "text": "Merry", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "eMan", "recog_valid": false, "glyph_recog_text": "Merry", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[301, 173], [303, 160], [311, 152], [318, 148], [328, 158], [344, 155], [358, 151], [363, 152], [385, 146], [390, 156], [375, 169], [357, 175], [341, 175], [324, 176], [315, 176], [313, 182], [306, 182], [302, 181]], "text": "Christmas", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bhristimas", "recog_valid": false, "glyph_recog_text": "Christmas", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176403.jpg", "caption": "a man in a leather jacket sitting under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045335.jpg", "caption": "a street sign with a no outlet sign on it", "annotations": [{"polygon": [[200, 137], [199, 167], [282, 166], [280, 136]], "text": "SWAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SWAN", "recog_valid": true, "glyph_recog_text": "SWAN", "glyph_recog_ld": 1.0}, {"polygon": [[214, 267], [214, 295], [309, 293], [313, 264]], "text": "OUTLRT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OUTLET", "recog_valid": false, "glyph_recog_text": "OUTLRT", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045337.jpg", "caption": "two men standing on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176435.jpg", "caption": "a cow with a tag on its ear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045366.jpg", "caption": "a train on the tracks with a red and white train", "annotations": [{"polygon": [[471, 326], [513, 352], [513, 381], [467, 348]], "text": "raili", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "raie", "recog_valid": false, "glyph_recog_text": "rail", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438594.jpg", "caption": "brussels sprouts at the farmers market", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569678.jpg", "caption": "a man kiteboarding in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569697.jpg", "caption": "two blue angels flying in formation", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307554.jpg", "caption": "orpheum theater, chicago, il", "annotations": [{"polygon": [[248, 45], [253, 313], [278, 308], [271, 36]], "text": "ORPHEUM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "O&aTDE", "recog_valid": false, "glyph_recog_text": "Oa工E", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438629.jpg", "caption": "a person is flying a kite on a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045422.jpg", "caption": "a blue side table with books and a vase on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045425.jpg", "caption": "a woman laying in bed with a pillow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438663.jpg", "caption": "a truck with a sticker on it that says staples", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176521.jpg", "caption": "a cityscape in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569742.jpg", "caption": "a group of people sitting at a table with plates of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438671.jpg", "caption": "a blue plate with broccoli, grapes, and eggs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569747.jpg", "caption": "a narrow street with a lot of people walking down it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045464.jpg", "caption": "three people posing for a picture in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045471.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[98, 328], [120, 225], [361, 235], [380, 334]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438698.jpg", "caption": "a woman walking down the street with a shopping bag", "annotations": [{"polygon": [[67, 85], [62, 190], [67, 207], [81, 216], [118, 213], [217, 213], [215, 82]], "text": "SGiFi", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "8n", "recog_valid": false, "glyph_recog_text": "SGIFi", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045491.jpg", "caption": "a man sitting on a bus with a bunch of luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045492.jpg", "caption": "a group of people walking on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045494.jpg", "caption": "a truck is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176571.jpg", "caption": "people are standing in a square with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307662.jpg", "caption": "a stop sign and a sign has texts", "annotations": [{"polygon": [[258, 390], [372, 392], [381, 342], [262, 344]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045526.jpg", "caption": "a slice of pizza with corn, chicken and vegetables on it", "annotations": [{"polygon": [[7, 72], [36, 73], [67, 60], [87, 35], [97, 59], [74, 75], [47, 89], [12, 95], [8, 90]], "text": "SAMUEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "SAMUEI", "recog_valid": false, "glyph_recog_text": "SAMUEL", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[21, 107], [21, 91], [54, 81], [96, 60], [101, 81], [58, 101]], "text": "SAMUEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "ADAMS", "recog_valid": false, "glyph_recog_text": "SAMUEL", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307671.jpg", "caption": "a man laying on the ground with a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045543.jpg", "caption": "a train traveling down the tracks in the desert", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438762.jpg", "caption": "a baby sitting in a chair with a computer mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045552.jpg", "caption": "a man in a black shirt kicking a soccer ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438769.jpg", "caption": "a snowboarder in mid air", "annotations": [{"polygon": [[355, 196], [374, 170], [389, 194], [372, 221]], "text": "RS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RS", "recog_valid": true, "glyph_recog_text": "RS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045573.jpg", "caption": "a highway with cows grazing on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307729.jpg", "caption": "the bank of britain building is located on the corner of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569889.jpg", "caption": "a man holding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307757.jpg", "caption": "a couple walking down the street", "annotations": [{"polygon": [[272, 180], [289, 194], [350, 121], [353, 64], [312, 128], [297, 155], [286, 174], [273, 179]], "text": "BAKERY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "BAKERY", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438835.jpg", "caption": "a kitchen with a sink, microwave, and a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045621.jpg", "caption": "a large truck with a large trailer on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569941.jpg", "caption": "two people walking on the beach holding surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045659.jpg", "caption": "a baseball player is holding a bat while another is standing behind him", "annotations": [{"polygon": [[227, 213], [242, 208], [253, 208], [270, 206], [291, 198], [294, 196], [294, 178], [281, 184], [282, 168], [266, 174], [234, 180], [217, 173], [212, 186], [218, 197]], "text": "olers", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Koe", "recog_valid": false, "glyph_recog_text": "olers", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176732.jpg", "caption": "a view of the airport from the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307814.jpg", "caption": "a desk with two computers and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438899.jpg", "caption": "a coffee mug and a dog figurine on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176757.jpg", "caption": "a person riding a snowboard down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176767.jpg", "caption": "a display of teddy bears in a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176790.jpg", "caption": "a young man and woman in formal attire posing for a photo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570015.jpg", "caption": "a bag of items including a flashlight, a knife, a pair of gloves, a pair of scissors, a pair of scissors, a pair of gloves, a", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438946.jpg", "caption": "a bus and a car are driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570019.jpg", "caption": "a hot dog with tomato sauce and cheese on a bun", "annotations": [{"polygon": [[56, 219], [55, 180], [92, 175], [94, 214]], "text": "JB", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "JB", "recog_valid": true, "glyph_recog_text": "J", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[68, 237], [70, 254], [87, 247], [97, 244], [106, 240], [114, 235], [113, 223]], "text": "Chileno", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Chileng", "recog_valid": false, "glyph_recog_text": "Chileno", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438978.jpg", "caption": "an air canada airplane is parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570069.jpg", "caption": "a group of men playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176871.jpg", "caption": "a large stainless steel refrigerator and a smaller stainless steel refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307949.jpg", "caption": "a bus is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176910.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439060.jpg", "caption": "a group of people standing around a row of motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439072.jpg", "caption": "photo - josh reddick, san francisco giants, july 4, 2015", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176946.jpg", "caption": "a red and white train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308021.jpg", "caption": "a black and white photo of people in front of a cave", "annotations": [{"polygon": [[10, 95], [10, 67], [109, 65], [108, 92], [109, 93]], "text": "43816", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "43816", "recog_valid": true, "glyph_recog_text": "43816", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439095.jpg", "caption": "a cart with a bunch of suitcases on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308028.jpg", "caption": "a cat laying on a computer keyboard", "annotations": [{"polygon": [[174, 354], [157, 372], [150, 391], [174, 402], [179, 391], [191, 380], [195, 362], [193, 357], [177, 351], [174, 354]], "text": "Rea", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "eacT", "recog_valid": false, "glyph_recog_text": "eay", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308034.jpg", "caption": "a boat with a canopy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308047.jpg", "caption": "a laptop computer and a desk with a pen and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570203.jpg", "caption": "a red and white bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570207.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[137, 412], [195, 392], [194, 409], [136, 428]], "text": "STOPPING", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STOPPING", "recog_valid": true, "glyph_recog_text": "STOPPING", "glyph_recog_ld": 1.0}, {"polygon": [[123, 407], [123, 416], [146, 403], [189, 389], [190, 378], [165, 387], [141, 396]], "text": "TOW-AWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TOWCAWAY", "recog_valid": false, "glyph_recog_text": "TOAWAY", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[232, 374], [231, 390], [268, 445], [268, 430]], "text": "ROSSLYN", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "NSE", "recog_valid": false, "glyph_recog_text": "RO&ELYN", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570215.jpg", "caption": "a table with knitting supplies and a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177011.jpg", "caption": "a young boy is learning to ski", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045946.jpg", "caption": "a group of men posing for a picture with a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308100.jpg", "caption": "a train track with a train station and a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439175.jpg", "caption": "a group of people sitting around a table eating pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570263.jpg", "caption": "a snowboarder is jumping over a ramp", "annotations": [{"polygon": [[201, 293], [297, 293], [294, 384], [199, 371]], "text": "32", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "32", "recog_valid": true, "glyph_recog_text": "32", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177050.jpg", "caption": "a man cutting into a cake with a knife", "annotations": [{"polygon": [[394, 384], [385, 387], [367, 434], [379, 438], [395, 420], [406, 392]], "text": "Arrivederci", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Mrivederc", "recog_valid": false, "glyph_recog_text": "1aapoesay", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570275.jpg", "caption": "a woman sitting at a desk with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177060.jpg", "caption": "a bus driving down a street with a church in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439214.jpg", "caption": "a couple standing in a living room with a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308145.jpg", "caption": "a man sitting at a desk using a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177078.jpg", "caption": "a clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570297.jpg", "caption": "a red bicycle parked on a bench in a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177093.jpg", "caption": "a small vehicle on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046023.jpg", "caption": "a man and a woman standing in front of a cake", "annotations": [{"polygon": [[135, 447], [143, 462], [212, 442], [198, 422]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Orap bant", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308180.jpg", "caption": "an older couple playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570338.jpg", "caption": "a person riding a motorcycle on a road", "annotations": [{"polygon": [[196, 188], [217, 198], [220, 205], [217, 211], [211, 220], [206, 214], [197, 208], [190, 207], [181, 206], [185, 202], [189, 199], [187, 195], [188, 191], [191, 189]], "text": "22", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "22", "recog_valid": true, "glyph_recog_text": "22", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570339.jpg", "caption": "a knitted purse with a cell phone in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439268.jpg", "caption": "a person riding a dirt bike on a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177125.jpg", "caption": "two police officers on horses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570343.jpg", "caption": "a person skiing down a snowy slope with a fence in the background", "annotations": [{"polygon": [[331, 240], [331, 240], [326, 258], [333, 272], [388, 281], [386, 247], [340, 240]], "text": "GORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GOPB", "recog_valid": false, "glyph_recog_text": "GORE", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046059.jpg", "caption": "a bicycle with a wreath on it is parked next to a stop sign", "annotations": [{"polygon": [[202, 119], [204, 91], [211, 86], [278, 92], [283, 99], [284, 107], [278, 114], [270, 132], [248, 132], [229, 130], [208, 130], [203, 125]], "text": "stop", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": false, "glyph_recog_text": "stop", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570358.jpg", "caption": "a man riding a bike on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308221.jpg", "caption": "a cat laying under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046090.jpg", "caption": "a cat sitting on a desk", "annotations": [{"polygon": [[87, 300], [78, 244], [29, 246], [40, 303]], "text": "G", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "U", "recog_valid": false, "glyph_recog_text": "0", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177163.jpg", "caption": "a snowboarder is jumping over a tree covered hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308257.jpg", "caption": "a woman sitting on a stone ledge with her laptop", "annotations": [{"polygon": [[395, 413], [394, 376], [434, 376], [437, 411]], "text": "459", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "459", "recog_valid": true, "glyph_recog_text": "459", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439356.jpg", "caption": "a street sign with a crosswalk and a stop sign", "annotations": [{"polygon": [[273, 209], [357, 187], [356, 210], [282, 228], [281, 226], [274, 227]], "text": "LDLOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LUDLOW", "recog_valid": false, "glyph_recog_text": "LDLOW", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[181, 138], [180, 160], [232, 190], [232, 169]], "text": "HOUSTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HOUSTON", "recog_valid": true, "glyph_recog_text": "HOUSTON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570430.jpg", "caption": "thunderchick boat for sale in san francisco", "annotations": [{"polygon": [[289, 345], [323, 338], [352, 333], [378, 329], [404, 325], [443, 321], [443, 339], [430, 341], [413, 343], [388, 346], [366, 350], [347, 353], [292, 363]], "text": "THUNDERCHICK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THUNDERCHICK", "recog_valid": true, "glyph_recog_text": "THUNDERCHICK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570440.jpg", "caption": "a baseball player standing on a mound of dirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439373.jpg", "caption": "a man on a motorcycle with a sign in front of him", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439384.jpg", "caption": "a bicycle with books on it in front of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439392.jpg", "caption": "a baseball player sliding into a base", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177247.jpg", "caption": "a pizza in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177253.jpg", "caption": "a large display of apples", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439400.jpg", "caption": "a silver airplane with a sticker on it", "annotations": [{"polygon": [[125, 203], [168, 218], [143, 244], [117, 231]], "text": "77", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "77", "recog_valid": true, "glyph_recog_text": "77", "glyph_recog_ld": 1.0}, {"polygon": [[207, 212], [212, 230], [229, 226], [252, 226], [262, 242], [276, 227], [274, 219], [262, 209], [252, 204], [236, 204]], "text": "BOCKSCAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "aslssegp", "recog_valid": false, "glyph_recog_text": "BOCKSCAR", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046189.jpg", "caption": "a person flying a kite in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308350.jpg", "caption": "a street sign in front of a small village", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046207.jpg", "caption": "a man laying in bed with a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177277.jpg", "caption": "a person's hand giving a thumbs up", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570503.jpg", "caption": "a motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570512.jpg", "caption": "a man with a mohawk and a beard", "annotations": [{"polygon": [[180, 371], [200, 368], [222, 364], [244, 357], [265, 356], [289, 354], [314, 346], [330, 354], [284, 370], [245, 381], [192, 395]], "text": "COLUMBIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "COLUmIR", "recog_valid": false, "glyph_recog_text": "COLUMBIA", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[61, 340], [86, 330], [114, 376], [93, 386]], "text": "Classic", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Classic", "recog_valid": true, "glyph_recog_text": "Olassic", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[1, 341], [29, 328], [64, 388], [36, 402]], "text": "LER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "LER", "recog_valid": true, "glyph_recog_text": "LER", "glyph_recog_ld": 1.0}, {"polygon": [[63, 439], [56, 411], [169, 371], [181, 396]], "text": "SH", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "n:1SH", "recog_valid": false, "glyph_recog_text": "SH", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570518.jpg", "caption": "a plate with a sandwich and broccoli on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570522.jpg", "caption": "a woman eating a slice of pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177319.jpg", "caption": "a yellow bus driving down a street with people on bikes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439493.jpg", "caption": "a car radio with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177349.jpg", "caption": "a statue of a cow with pink and black clothing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177353.jpg", "caption": "a group of giraffes standing next to a vehicle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570601.jpg", "caption": "a baseball game in progress with a pitcher and catcher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570602.jpg", "caption": "a red and black boat with a christmas decoration on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308470.jpg", "caption": "a group of people sitting on the grass", "annotations": [{"polygon": [[330, 129], [334, 163], [372, 156], [367, 121]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "13", "recog_valid": true, "glyph_recog_text": "13", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308473.jpg", "caption": "a pair of scissors and a measuring tape on a cloth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570618.jpg", "caption": "a yellow and blue train engine sitting on the tracks", "annotations": [{"polygon": [[219, 219], [262, 220], [263, 242], [260, 253], [220, 254]], "text": "Fe", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Fe", "recog_valid": true, "glyph_recog_text": "Fe", "glyph_recog_ld": 1.0}, {"polygon": [[103, 216], [196, 222], [210, 231], [215, 250], [208, 254], [102, 252]], "text": "Santa", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Santa", "recog_valid": true, "glyph_recog_text": "Santa", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177404.jpg", "caption": "a laptop computer sitting on a bed with a pen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177418.jpg", "caption": "a red and white bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308496.jpg", "caption": "a man on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308527.jpg", "caption": "a herd of elephants standing in a field at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308536.jpg", "caption": "a pile of keyboards and mice", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439614.jpg", "caption": "a boat with people on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177470.jpg", "caption": "a group of men in wheelchairs playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308548.jpg", "caption": "a bus driving down a street with a few people walking around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046408.jpg", "caption": "a large airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308568.jpg", "caption": "a person walking past a bench and a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570719.jpg", "caption": "a bus driving under a red archway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570721.jpg", "caption": "a white and pink bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308579.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[66, 230], [66, 256], [87, 286], [128, 272], [154, 264], [187, 255], [214, 262], [261, 254], [269, 244], [237, 198], [227, 193], [192, 199], [155, 200], [82, 222]], "text": "WAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KrAR", "recog_valid": false, "glyph_recog_text": "WAR", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308589.jpg", "caption": "two men standing in front of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308590.jpg", "caption": "a man doing a trick on a skateboard at a skate park", "annotations": [{"polygon": [[294, 45], [294, 74], [371, 79], [372, 47]], "text": "Packy", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Pachy", "recog_valid": false, "glyph_recog_text": "Packy", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[292, 79], [293, 108], [390, 109], [391, 81]], "text": "Fancher", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CFancher", "recog_valid": false, "glyph_recog_text": "Fancher", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570742.jpg", "caption": "a bench sitting in a field with a house in the background", "annotations": [{"polygon": [[67, 324], [70, 330], [75, 335], [77, 336], [87, 332], [132, 307], [124, 300], [112, 299], [91, 308]], "text": "Controles", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Cootrelo", "recog_valid": false, "glyph_recog_text": "Cantroles", "glyph_recog_ld": 0.44444506172770915}, {"polygon": [[55, 303], [62, 316], [114, 295], [135, 288], [129, 280], [101, 286]], "text": "correction", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Crxvetuon", "recog_valid": false, "glyph_recog_text": "comeaction", "glyph_recog_ld": 0.3000006999993}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308598.jpg", "caption": "a skier is in the air on a snowy mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439676.jpg", "caption": "a snowboarder is doing a trick on a ramp", "annotations": [{"polygon": [[234, 204], [225, 206], [219, 183], [287, 80], [309, 95]], "text": "custom", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "oubtem", "recog_valid": false, "glyph_recog_text": "custom", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308606.jpg", "caption": "a man is standing next to a bunch of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177538.jpg", "caption": "a woman in a red hat and white shirt with a red hat", "annotations": [{"polygon": [[138, 326], [168, 331], [155, 413], [133, 408]], "text": "walk", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "npm", "recog_valid": false, "glyph_recog_text": "30一", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[164, 374], [277, 385], [265, 423], [153, 413]], "text": "AS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "A.S", "recog_valid": false, "glyph_recog_text": "AS", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[422, 324], [415, 347], [427, 357], [447, 361], [449, 336]], "text": "LS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "L", "recog_valid": false, "glyph_recog_text": "LS", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[187, 337], [187, 352], [172, 350], [165, 361], [167, 376], [175, 378], [184, 384], [256, 382], [256, 372], [252, 364]], "text": "Ofeet", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "te", "recog_valid": false, "glyph_recog_text": "Ofeet", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570807.jpg", "caption": "a yellow and red building with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570817.jpg", "caption": "a tv with a tv show on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177604.jpg", "caption": "a black and white photo of an old building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046535.jpg", "caption": "a toy pug dog with a pink keychain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046537.jpg", "caption": "the los angeles kings celebrate their victory in the nhl finals", "annotations": [{"polygon": [[234, 208], [234, 208], [231, 234], [274, 226], [266, 204]], "text": "Line", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Jine", "recog_valid": false, "glyph_recog_text": "Line", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[193, 237], [192, 259], [287, 247], [285, 226]], "text": "citysightseeing", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GitySightseeing", "recog_valid": false, "glyph_recog_text": "citysightseeing", "glyph_recog_ld": 0.8666667555554963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439758.jpg", "caption": "a laptop computer and a coffee mug on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308701.jpg", "caption": "a bicycle is parked next to a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046563.jpg", "caption": "a man is walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439801.jpg", "caption": "a man in a baseball cap talking on a cell phone", "annotations": [{"polygon": [[310, 355], [317, 352], [324, 354], [331, 362], [340, 366], [351, 339], [342, 337], [339, 339], [319, 335], [314, 338], [306, 353]], "text": "ALS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BNAL5", "recog_valid": false, "glyph_recog_text": "ALS", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570879.jpg", "caption": "a plate with a sandwich and chips on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439822.jpg", "caption": "a man sitting at a table in a train car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177680.jpg", "caption": "a man in red shirt swinging a bat on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046630.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[126, 214], [136, 242], [149, 243], [170, 241], [173, 234], [172, 218], [159, 212], [132, 211]], "text": "30", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "30", "recog_valid": true, "glyph_recog_text": "30", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308774.jpg", "caption": "a woman on a skateboard in front of a graffiti wall", "annotations": [{"polygon": [[281, 167], [271, 214], [332, 225], [385, 215], [397, 203], [390, 187], [283, 169]], "text": "HAEIER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LIeep", "recog_valid": false, "glyph_recog_text": "HAEIER", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[387, 264], [387, 294], [424, 295], [427, 260]], "text": "SC", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "60", "recog_valid": false, "glyph_recog_text": "Sc", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439855.jpg", "caption": "cauliflower at the supermarket", "annotations": [{"polygon": [[368, 188], [368, 188], [370, 208], [392, 236], [413, 235], [432, 188], [368, 185]], "text": "79 ", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "20", "recog_valid": false, "glyph_recog_text": "79", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570938.jpg", "caption": "a kitchen with a stove and oven in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308796.jpg", "caption": "a family is loading luggage into a car", "annotations": [{"polygon": [[0, 320], [53, 316], [51, 289], [0, 291]], "text": "VBL", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "VBL", "recog_valid": true, "glyph_recog_text": "VBL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439890.jpg", "caption": "a fire truck driving down a street with a cloudy sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439896.jpg", "caption": "a woman riding a scooter in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308825.jpg", "caption": "a computer mouse and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046686.jpg", "caption": "a broken tv on a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570978.jpg", "caption": "a yellow double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439938.jpg", "caption": "a group of people walking on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046731.jpg", "caption": "a computer keyboard, mouse, and cd-rom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177807.jpg", "caption": "a baseball player throwing a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046737.jpg", "caption": "a blue truck is driving down the street in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177817.jpg", "caption": "a motorcycle parked on the street", "annotations": [{"polygon": [[155, 273], [175, 293], [158, 304], [137, 282]], "text": "4919", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "4919", "recog_valid": true, "glyph_recog_text": "4919", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439991.jpg", "caption": "a group of people standing on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046786.jpg", "caption": "a stop sign with a white background", "annotations": [{"polygon": [[25, 202], [14, 299], [266, 302], [266, 213]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308936.jpg", "caption": "a woman wearing a hat playing a cello", "annotations": [{"polygon": [[316, 241], [417, 239], [415, 207], [318, 201]], "text": "ent", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Jent/", "recog_valid": false, "glyph_recog_text": "ent", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046809.jpg", "caption": "a toothbrush and toothpaste in a cup", "annotations": [{"polygon": [[130, 247], [86, 428], [66, 426], [106, 214]], "text": "Aquafresh", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "2ER6y", "recog_valid": false, "glyph_recog_text": "Aquafresh", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308954.jpg", "caption": "two helicopters flying in the sky with one in the front and one in the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046813.jpg", "caption": "a white van with bananas and bags of other produce", "annotations": [{"polygon": [[363, 169], [360, 204], [433, 204], [435, 169]], "text": "BAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BAR", "recog_valid": true, "glyph_recog_text": "BAR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308967.jpg", "caption": "a woman in blue shirt and black shorts is jumping with a tennis racket", "annotations": [{"polygon": [[251, 217], [245, 256], [269, 262], [276, 224]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177896.jpg", "caption": "two people skateboarding on a wet road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177899.jpg", "caption": "a building with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177915.jpg", "caption": "two men standing on a tennis court holding tennis rackets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177917.jpg", "caption": "a stuffed bear sitting in a car with a stuffed animal", "annotations": [{"polygon": [[240, 217], [317, 242], [308, 266], [221, 252]], "text": "GAIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GAT", "recog_valid": false, "glyph_recog_text": "GAIN", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571146.jpg", "caption": "a woman playing a video game on a wii", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571149.jpg", "caption": "a woman is throwing a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177947.jpg", "caption": "a store front with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177957.jpg", "caption": "a man swinging a tennis racket at a tennis ball", "annotations": [{"polygon": [[87, 193], [86, 286], [425, 289], [424, 191]], "text": "RIAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RIEAS", "recog_valid": false, "glyph_recog_text": "RIAS", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571174.jpg", "caption": "a young man in a hoodie and jeans walking on a sidewalk", "annotations": [{"polygon": [[384, 412], [393, 415], [418, 419], [441, 422], [455, 421], [469, 414], [509, 387], [507, 374], [442, 374], [403, 392]], "text": "Leland", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "land", "recog_valid": false, "glyph_recog_text": "Leland", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046885.jpg", "caption": "a skateboarder is doing a trick on a ramp", "annotations": [{"polygon": [[83, 419], [83, 513], [307, 511], [304, 451]], "text": "CAMP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "BAMP", "recog_valid": false, "glyph_recog_text": "CAMP", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177959.jpg", "caption": "a giraffe and a donkey in a zoo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571199.jpg", "caption": "a red post box and a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440136.jpg", "caption": "a person sitting on a bench with their head down", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178016.jpg", "caption": "two people riding waves in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178018.jpg", "caption": "a woman standing in front of a train car", "annotations": [{"polygon": [[342, 321], [413, 285], [413, 309], [340, 348]], "text": "NO 39 T", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "N939!", "recog_valid": false, "glyph_recog_text": "NO 39T", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571237.jpg", "caption": "a small airplane sitting on the grass in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178040.jpg", "caption": "people riding bikes down a sidewalk in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178052.jpg", "caption": "a group of young people playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178053.jpg", "caption": "a baseball player holding a bat in front of a net", "annotations": [{"polygon": [[158, 334], [152, 314], [150, 298], [164, 293], [180, 290], [195, 291], [208, 291], [218, 291], [231, 291], [239, 294], [249, 297], [247, 320], [245, 330], [240, 333], [226, 330], [213, 328], [199, 328], [181, 328], [170, 329], [164, 331]], "text": "FIELDS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FIELOS", "recog_valid": false, "glyph_recog_text": "FIELDS", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[148, 346], [254, 342], [260, 426], [151, 425]], "text": "29", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "29", "recog_valid": true, "glyph_recog_text": "29", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440244.jpg", "caption": "a woman standing next to a mascot on a baseball field", "annotations": [{"polygon": [[295, 303], [292, 278], [341, 265], [346, 282]], "text": "Mets", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Meto", "recog_valid": false, "glyph_recog_text": "Mets", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440262.jpg", "caption": "a woman standing on a stilt with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440269.jpg", "caption": "a street sign with a sign that says yester wy", "annotations": [{"polygon": [[227, 358], [328, 358], [323, 392], [237, 393], [233, 376]], "text": "yesler", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Yesler", "recog_valid": false, "glyph_recog_text": "yesler", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[347, 369], [382, 367], [384, 376], [380, 398], [375, 395], [366, 391], [354, 391], [352, 386]], "text": "WY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Wy", "recog_valid": false, "glyph_recog_text": "WY", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571355.jpg", "caption": "two cows walking down a street in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047084.jpg", "caption": "a blue airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178171.jpg", "caption": "a city street with cars and buses on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571392.jpg", "caption": "a horse standing in a field with people around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178177.jpg", "caption": "a man sitting on a surfboard on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047116.jpg", "caption": "a sign pointing to different directions in the mountains", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178192.jpg", "caption": "a desk with two computers and a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440339.jpg", "caption": "three women are holding up cakes and one is smiling", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178207.jpg", "caption": "a man playing tennis on a clay court", "annotations": [{"polygon": [[49, 189], [50, 210], [172, 185], [170, 163]], "text": "ConiServizi", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ConiServizi", "recog_valid": true, "glyph_recog_text": "ConiServizi", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571431.jpg", "caption": "a kitchen with white cabinets and black counter tops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571451.jpg", "caption": "a table with two sandwiches and a soda", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178242.jpg", "caption": "a bus is parked on the side of the road", "annotations": [{"polygon": [[373, 419], [363, 445], [380, 445], [384, 439], [393, 438], [501, 438], [509, 421], [459, 414], [419, 414]], "text": "yakobus", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "yakobus", "recog_valid": true, "glyph_recog_text": "yakobus", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309319.jpg", "caption": "a man in a hat and sunglasses playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440401.jpg", "caption": "a cell phone and an ipod sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571503.jpg", "caption": "three old war planes parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047221.jpg", "caption": "a crowd of people standing in line to get food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571541.jpg", "caption": "a row of buses parked in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571548.jpg", "caption": "a stop sign and a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309409.jpg", "caption": "a man and woman under an umbrella in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571562.jpg", "caption": "a black and white photo of a bed with pillows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571563.jpg", "caption": "a group of people on skis on a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047281.jpg", "caption": "a street sign with a bunch of stickers on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571573.jpg", "caption": "a three tiered cake with wine glasses and roses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571575.jpg", "caption": "a group of people playing music under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047294.jpg", "caption": "a group of people on motorcycles with signs and a police officer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047295.jpg", "caption": "a large clock on a pedestal in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309461.jpg", "caption": "a baseball player running to first base", "annotations": [{"polygon": [[246, 193], [255, 223], [290, 212], [294, 223], [300, 218], [298, 194], [285, 189], [249, 191]], "text": "yall", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Talo", "recog_valid": false, "glyph_recog_text": "yall", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047328.jpg", "caption": "a pizza with cheese and vegetables on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178429.jpg", "caption": "a man riding a horse in an arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571648.jpg", "caption": "three men in baseball uniforms posing for a photo", "annotations": [{"polygon": [[368, 224], [376, 254], [398, 251], [394, 222]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "W", "recog_valid": true, "glyph_recog_text": "w", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178436.jpg", "caption": "two pictures of a desk with a laptop and printer", "annotations": [{"polygon": [[300, 397], [276, 427], [296, 427], [315, 403]], "text": "TE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TE", "recog_valid": true, "glyph_recog_text": "TE", "glyph_recog_ld": 1.0}, {"polygon": [[257, 251], [243, 258], [292, 287], [307, 273]], "text": "acing", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "cucing", "recog_valid": false, "glyph_recog_text": "acing", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571653.jpg", "caption": "three women in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571654.jpg", "caption": "a desk with two computer monitors and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178438.jpg", "caption": "a man on a skateboard in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047387.jpg", "caption": "a man and his dog riding on a motorcycle", "annotations": [{"polygon": [[411, 202], [411, 216], [493, 183], [486, 172]], "text": "Allen's", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Automotive", "recog_valid": false, "glyph_recog_text": "Atlen's", "glyph_recog_ld": 0.20000079999919995}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178494.jpg", "caption": "a boy running to home plate with a baseball bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440641.jpg", "caption": "a sandwich with a pickle on it and french fries", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309585.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309598.jpg", "caption": "a baseball player is standing on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440673.jpg", "caption": "a group of people walking down a street with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178550.jpg", "caption": "a car parked on the side of a street in the fall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571768.jpg", "caption": "a man in a yellow vest", "annotations": [{"polygon": [[60, 292], [89, 286], [123, 283], [131, 280], [146, 283], [143, 304], [127, 300], [107, 305], [87, 308], [59, 311]], "text": "POLICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "POLICE", "recog_valid": true, "glyph_recog_text": "POLICE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178557.jpg", "caption": "a man riding a skateboard in the dark", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309635.jpg", "caption": "a shirtless man on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440707.jpg", "caption": "a street sign with a number and a street name", "annotations": [{"polygon": [[251, 190], [299, 157], [297, 144], [291, 144], [249, 175]], "text": "Dainikeihin", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Danikeihin", "recog_valid": false, "glyph_recog_text": "Dainlkeitin", "glyph_recog_ld": 0.7272729752063862}, {"polygon": [[192, 243], [192, 243], [222, 242], [241, 241], [242, 285], [192, 287], [186, 283], [183, 272], [185, 254], [186, 247], [187, 245]], "text": "311", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "311", "recog_valid": true, "glyph_recog_text": "311", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178567.jpg", "caption": "a woman in blue shirt and gray skirt playing tennis", "annotations": [{"polygon": [[368, 70], [362, 136], [402, 140], [407, 74]], "text": "E", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "LW", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047502.jpg", "caption": "a woman and a baby sitting at a table with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047507.jpg", "caption": "a plate with a sandwich and soup on it", "annotations": [{"polygon": [[130, 116], [138, 168], [98, 190], [90, 135]], "text": "COSI", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Losi", "recog_valid": false, "glyph_recog_text": "oou", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[229, 334], [221, 339], [270, 372], [278, 366], [250, 348]], "text": "DELI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DELICI", "recog_valid": false, "glyph_recog_text": "95-:", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440735.jpg", "caption": "a wooden board topped with four small appetizers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440752.jpg", "caption": "a dalmatian dog sitting in the driver's seat of a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047544.jpg", "caption": "a kitchen with a window and a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440762.jpg", "caption": "a person playing a video game on a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047550.jpg", "caption": "a young girl holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440771.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[195, 193], [204, 162], [276, 270], [269, 289]], "text": "COVEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "COVEN", "recog_valid": true, "glyph_recog_text": "COVEN", "glyph_recog_ld": 1.0}, {"polygon": [[135, 313], [266, 287], [268, 320], [132, 344]], "text": "HAMPS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HAMPS", "recog_valid": true, "glyph_recog_text": "HAMPS", "glyph_recog_ld": 1.0}, {"polygon": [[274, 283], [272, 317], [362, 298], [364, 264]], "text": "HIRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HIRE", "recog_valid": true, "glyph_recog_text": "HIRE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440786.jpg", "caption": "a red and white airplane flying in the sky", "annotations": [{"polygon": [[288, 251], [329, 218], [297, 211], [250, 248], [278, 258]], "text": "44G", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "K1486", "recog_valid": false, "glyph_recog_text": "44G", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178645.jpg", "caption": "a man on the phone outside a restaurant", "annotations": [{"polygon": [[321, 255], [358, 249], [361, 215], [318, 219]], "text": "Dad", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Dod", "recog_valid": false, "glyph_recog_text": "Dad", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[313, 253], [262, 258], [261, 222], [281, 222], [308, 234]], "text": "YOUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Youm", "recog_valid": false, "glyph_recog_text": "YOUR", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[348, 274], [352, 295], [298, 303], [298, 305], [379, 296], [380, 277]], "text": "Fathers Day!", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "alhers Day", "recog_valid": false, "glyph_recog_text": "Fathers Day", "glyph_recog_ld": 0.8181819834709241}, {"polygon": [[367, 217], [362, 244], [370, 249], [394, 249], [416, 244], [413, 217]], "text": "Called", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Called", "recog_valid": true, "glyph_recog_text": "Called", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440792.jpg", "caption": "a clock on a wall with a colorful pattern", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178666.jpg", "caption": "a woman holding an umbrella in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440811.jpg", "caption": "a row of bicycles parked on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571886.jpg", "caption": "a laptop computer sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178672.jpg", "caption": "a red train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178674.jpg", "caption": "a cat sitting in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571895.jpg", "caption": "a man on a skateboard doing a trick", "annotations": [{"polygon": [[329, 329], [346, 276], [417, 280], [400, 346]], "text": "SKA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SKA", "recog_valid": true, "glyph_recog_text": "SKA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178685.jpg", "caption": "two toothbrushes are in a cup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309774.jpg", "caption": "a motorcycle on display in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571916.jpg", "caption": "people walking along the river in front of buildings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309781.jpg", "caption": "a motorcycle parked next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047640.jpg", "caption": "a man wearing a tie and sunglasses giving the thumbs up", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440877.jpg", "caption": "soccer players in indoor arena playing on artificial turf", "annotations": [{"polygon": [[62, 262], [100, 259], [98, 289], [61, 297], [61, 284]], "text": "SPACE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SPACT", "recog_valid": false, "glyph_recog_text": "SPACE", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[59, 262], [104, 258], [104, 230], [58, 226]], "text": "CREATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CREAT", "recog_valid": false, "glyph_recog_text": "CREATE", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178742.jpg", "caption": "a traffic light on a city street with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309829.jpg", "caption": "a stop sign and a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440904.jpg", "caption": "a man in a top hat and suit sitting on a rock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440919.jpg", "caption": "a man holding a box with the word bill saskenbank on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178791.jpg", "caption": "a collage of four pictures of a baseball player hitting the ball", "annotations": [{"polygon": [[211, 354], [208, 385], [246, 388], [255, 375], [300, 371], [299, 355], [247, 354]], "text": "tigers", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tigous", "recog_valid": false, "glyph_recog_text": "tigers", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309881.jpg", "caption": "three cows standing in a barn with hay", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000572036.jpg", "caption": "a man in a red shirt is holding a baseball bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047767.jpg", "caption": "a man in a suit and tie holding a slice of pizza", "annotations": [{"polygon": [[321, 164], [317, 206], [462, 205], [465, 143]], "text": "MotorCity", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MeaeanCity", "recog_valid": false, "glyph_recog_text": "MotorCity", "glyph_recog_ld": 0.5000004999995}, {"polygon": [[322, 223], [312, 262], [459, 264], [459, 243]], "text": "Collection", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Cellention", "recog_valid": false, "glyph_recog_text": "Collection", "glyph_recog_ld": 0.8000001999998}, {"polygon": [[374, 99], [374, 99], [370, 120], [401, 113], [403, 90]], "text": "Buddy's", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Baditi", "recog_valid": false, "glyph_recog_text": "Budrty's", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309915.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[331, 230], [337, 195], [397, 193], [395, 207]], "text": "Oriolca", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cetsla", "recog_valid": false, "glyph_recog_text": "Oriolca", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047778.jpg", "caption": "a young boy laying down with a stuffed animal", "annotations": [{"polygon": [[309, 352], [352, 349], [360, 379], [312, 378]], "text": "MAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MAN", "recog_valid": true, "glyph_recog_text": "MAN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047792.jpg", "caption": "a green double decker bus parked next to a white bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000441013.jpg", "caption": "a refrigerator with magnets on the door and a magnet on the fridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178871.jpg", "caption": "a parking meter on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309948.jpg", "caption": "a person holding a red iphone in their hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047804.jpg", "caption": "a person riding a wave on a surfboard", "annotations": [{"polygon": [[425, 374], [422, 358], [433, 346], [487, 347], [492, 364], [483, 380], [473, 384], [466, 384]], "text": "my", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "my", "recog_valid": true, "glyph_recog_text": "m y", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309956.jpg", "caption": "a birthday cake with a truck on it", "annotations": [{"polygon": [[22, 159], [11, 113], [125, 80], [136, 118], [136, 118]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Happs", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[6, 213], [1, 161], [159, 109], [159, 109], [171, 143]], "text": "Birthday,", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Birthdod", "recog_valid": false, "glyph_recog_text": "Birthday,", "glyph_recog_ld": 0.6666670370366254}, {"polygon": [[423, 314], [416, 100], [453, 103], [468, 310], [468, 310]], "text": "CAMERONI", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "UT", "recog_valid": false, "glyph_recog_text": "RO", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[372, 135], [402, 163], [390, 171], [366, 140]], "text": "IRON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HOXT", "recog_valid": false, "glyph_recog_text": "IRON", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[246, 395], [241, 414], [278, 395], [278, 382]], "text": "happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "hrppy", "recog_valid": false, "glyph_recog_text": "happy", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575227.jpg", "caption": "a woman riding a bike past a building with a door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313097.jpg", "caption": "a group of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182029.jpg", "caption": "state theatre neon sign", "annotations": [{"polygon": [[298, 234], [275, 218], [251, 218], [225, 229], [216, 235], [201, 211], [219, 197], [256, 185], [282, 189], [307, 202], [317, 211]], "text": "STATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STATA", "recog_valid": false, "glyph_recog_text": "STATE", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[172, 323], [187, 347], [215, 364], [240, 369], [278, 361], [300, 347], [316, 330], [321, 320], [319, 308], [308, 299], [300, 310], [285, 328], [267, 338], [247, 341], [219, 333], [208, 322], [192, 300]], "text": "THEATRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "THEATRE", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575252.jpg", "caption": "a motorcycle racer is riding on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182054.jpg", "caption": "a person driving a car with a cow in the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575274.jpg", "caption": "a person walking past a bicycle with a sign", "annotations": [{"polygon": [[331, 200], [329, 228], [417, 225], [417, 198]], "text": "Califo", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Califor", "recog_valid": false, "glyph_recog_text": "Califo", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313131.jpg", "caption": "pullman bus - beirut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000050992.jpg", "caption": "a man sitting on a bus looking at fish", "annotations": [{"polygon": [[148, 219], [186, 233], [183, 250], [146, 236]], "text": "Iceland", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "keland", "recog_valid": false, "glyph_recog_text": "lcetand", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444210.jpg", "caption": "a white and yellow airplane", "annotations": [{"polygon": [[134, 148], [107, 193], [167, 200], [191, 154]], "text": "Delta", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Delta", "recog_valid": true, "glyph_recog_text": "Delta", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444214.jpg", "caption": "a group of people standing together in front of a screen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444235.jpg", "caption": "a bus driving down a street with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182098.jpg", "caption": "a cat sitting on a bed", "annotations": [{"polygon": [[121, 134], [123, 172], [144, 176], [152, 178], [165, 186], [170, 153], [152, 140], [141, 138]], "text": "RATS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RATS", "recog_valid": true, "glyph_recog_text": "RATS", "glyph_recog_ld": 1.0}, {"polygon": [[33, 147], [48, 188], [68, 180], [84, 174], [97, 173], [108, 172], [116, 173], [114, 134], [96, 133], [70, 134], [56, 139]], "text": "GIANT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "GIAJ", "recog_valid": false, "glyph_recog_text": "GIANT", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182104.jpg", "caption": "people sitting on a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444257.jpg", "caption": "a brown owl sitting on top of a tree stump", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313186.jpg", "caption": "a young boy holding a kite with a shark on it", "annotations": [{"polygon": [[184, 246], [204, 249], [230, 250], [229, 278], [199, 274], [184, 273], [180, 271]], "text": "SA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SA", "recog_valid": true, "glyph_recog_text": "SA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051052.jpg", "caption": "a man on a scooter", "annotations": [{"polygon": [[184, 147], [193, 141], [172, 105], [163, 112]], "text": "BROOK ", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "AOOHE", "recog_valid": false, "glyph_recog_text": "PROOK", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182127.jpg", "caption": "a large jet airplane flying through the air", "annotations": [{"polygon": [[244, 188], [258, 170], [263, 177], [268, 170], [265, 163], [285, 142], [281, 120], [278, 124], [274, 131], [237, 172], [237, 179], [242, 188]], "text": "allegiant", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "aliegiant.", "recog_valid": false, "glyph_recog_text": "allegiant", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051067.jpg", "caption": "a man standing next to a stop sign", "annotations": [{"polygon": [[229, 63], [155, 86], [155, 105], [230, 83]], "text": "POOPDECK", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "POOPDECK", "recog_valid": true, "glyph_recog_text": "POOPDECK", "glyph_recog_ld": 1.0}, {"polygon": [[184, 176], [232, 198], [232, 236], [184, 224]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051076.jpg", "caption": "a stop sign on a street corner in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051106.jpg", "caption": "an old train car on the tracks with a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313256.jpg", "caption": "an old train car sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575417.jpg", "caption": "a man and a woman sitting at a table with boxes of pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444350.jpg", "caption": "a man is catching a frisbee in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313278.jpg", "caption": "a group of girls playing soccer on a field", "annotations": [{"polygon": [[139, 200], [140, 218], [141, 232], [145, 239], [159, 235], [161, 232], [174, 232], [183, 232], [186, 228], [189, 193], [182, 190], [166, 191], [150, 195]], "text": "23", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "23", "recog_valid": true, "glyph_recog_text": "23", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313286.jpg", "caption": "a group of elephants walking down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575432.jpg", "caption": "a train is pulling into a station with people walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051156.jpg", "caption": "a group of umbrellas on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444373.jpg", "caption": "1940 lorries & coaches lorries & coaches", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182243.jpg", "caption": "a teddy bear laying on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444397.jpg", "caption": "a slice of pizza on a plate", "annotations": [{"polygon": [[300, 96], [314, 95], [325, 101], [330, 108], [333, 111], [336, 125], [338, 131], [330, 135], [328, 128], [325, 121], [320, 112], [310, 107], [302, 108], [298, 105], [297, 100]], "text": "CASTLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "JSTL", "recog_valid": false, "glyph_recog_text": "CASTLE", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575476.jpg", "caption": "a man holding a tray of donuts and a bag of chips", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444409.jpg", "caption": "a person's feet are on a desk with a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575486.jpg", "caption": "a vase with dried flowers in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575490.jpg", "caption": "a black and white photo of a baseball player swinging at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575498.jpg", "caption": "a desk with two computers and a monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313360.jpg", "caption": "a mountain lion is seen in the dark on a camera", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313362.jpg", "caption": "a table and benches in a park at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182300.jpg", "caption": "a white wii remote", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444445.jpg", "caption": "a baseball player sliding into home plate", "annotations": [{"polygon": [[254, 229], [266, 231], [272, 232], [280, 239], [279, 260], [266, 257], [258, 256], [244, 254]], "text": "43", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "43", "recog_valid": true, "glyph_recog_text": "43", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444453.jpg", "caption": "a parking meter with a blue and green scarf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313397.jpg", "caption": "a woman sitting on a bench reading a book", "annotations": [{"polygon": [[66, 244], [257, 245], [259, 285], [67, 287]], "text": "Wynyard", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wynyard", "recog_valid": true, "glyph_recog_text": "Wynyard", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051260.jpg", "caption": "a skateboarder is doing a trick in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182335.jpg", "caption": "a group of people sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051291.jpg", "caption": "a parking meter on the side of the road", "annotations": [{"polygon": [[212, 107], [253, 105], [258, 138], [213, 137]], "text": "23", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "23", "recog_valid": true, "glyph_recog_text": "23", "glyph_recog_ld": 1.0}, {"polygon": [[361, 169], [361, 187], [399, 163], [397, 150]], "text": "5BCE270", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "58EZW", "recog_valid": false, "glyph_recog_text": "5BCE270", "glyph_recog_ld": 0.28571530612099116}, {"polygon": [[157, 106], [157, 136], [200, 139], [204, 122], [199, 107]], "text": "00:23", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "00", "recog_valid": false, "glyph_recog_text": "00:23", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182367.jpg", "caption": "a man pouring water into a red and white container", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051302.jpg", "caption": "two women sitting at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051310.jpg", "caption": "metrolink train at the station", "annotations": [{"polygon": [[163, 172], [170, 177], [212, 115], [206, 109]], "text": "CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CROSSING", "recog_valid": true, "glyph_recog_text": "ENEhNg", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[352, 238], [352, 269], [510, 248], [510, 212]], "text": "METROLI", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "METROLII", "recog_valid": false, "glyph_recog_text": "METROLI", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575627.jpg", "caption": "a group of children eating pizza at a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051342.jpg", "caption": "a woman holding an umbrella walks past a graffiti covered building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313502.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575652.jpg", "caption": "a young boy is mixing eggs in a blender", "annotations": [{"polygon": [[65, 392], [97, 365], [100, 370], [68, 399]], "text": "KENWOOD", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "KENWOO", "recog_valid": false, "glyph_recog_text": "apeiic", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182444.jpg", "caption": "a baseball game in progress with a batter at bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444591.jpg", "caption": "a wooden clock on a shelf in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313532.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182468.jpg", "caption": "a flower vase with a can of soda on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313541.jpg", "caption": "a man is playing tennis on a blue court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444619.jpg", "caption": "a group of people riding motorcycles in a parade", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182502.jpg", "caption": "two men sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182505.jpg", "caption": "a man in a red shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182507.jpg", "caption": "a sandwich on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313596.jpg", "caption": "a pink donut with sprinkles and a cup of coffee", "annotations": [{"polygon": [[375, 30], [373, 54], [386, 59], [402, 66], [419, 69], [436, 73], [450, 74], [466, 77], [475, 77], [509, 80], [511, 74], [513, 54], [483, 53], [451, 50], [422, 45], [398, 39]], "text": "DUNKIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "DUNKIN", "recog_valid": true, "glyph_recog_text": "DUNKIN", "glyph_recog_ld": 1.0}, {"polygon": [[373, 61], [372, 82], [382, 86], [399, 94], [418, 98], [432, 102], [451, 105], [475, 107], [495, 108], [505, 108], [512, 86], [506, 83], [475, 83], [447, 80], [419, 75], [400, 70], [380, 62], [373, 59]], "text": "DONUTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "DONUTS", "recog_valid": true, "glyph_recog_text": "DONUTS", "glyph_recog_ld": 1.0}, {"polygon": [[364, 149], [364, 156], [372, 163], [390, 173], [426, 184], [453, 188], [453, 183], [432, 179], [415, 173], [389, 164], [375, 158]], "text": "DunkinDonuts.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "hkinDonuts.com", "recog_valid": false, "glyph_recog_text": "CaraieClonuts.cam", "glyph_recog_ld": 0.5294120415223285}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051474.jpg", "caption": "a bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575761.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444744.jpg", "caption": "a pizza on a cutting board", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575835.jpg", "caption": "a woman eating a doughnut", "annotations": [{"polygon": [[480, 83], [468, 118], [512, 110], [512, 82]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "A", "recog_valid": true, "glyph_recog_text": "A", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444769.jpg", "caption": "a gps device showing the location of a car", "annotations": [{"polygon": [[132, 130], [130, 160], [197, 161], [197, 132]], "text": "East", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "East", "recog_valid": true, "glyph_recog_text": "East", "glyph_recog_ld": 1.0}, {"polygon": [[283, 206], [298, 202], [328, 270], [315, 276]], "text": "RAINBOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RAINEIOW", "recog_valid": false, "glyph_recog_text": "RAINBOW", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[332, 378], [331, 412], [397, 409], [394, 375]], "text": "275", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "275", "recog_valid": true, "glyph_recog_text": "275", "glyph_recog_ld": 1.0}, {"polygon": [[59, 379], [56, 412], [123, 411], [123, 379]], "text": "1:24", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "1:24", "recog_valid": true, "glyph_recog_text": "1:24", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313694.jpg", "caption": "a baby smiling while holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182634.jpg", "caption": "a baseball player is swinging at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313709.jpg", "caption": "a man and woman standing next to a subway train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182639.jpg", "caption": "a crowd of people standing around a large kite", "annotations": [{"polygon": [[188, 132], [211, 102], [275, 253], [257, 284]], "text": "124 GOD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "QODA2I", "recog_valid": false, "glyph_recog_text": "124 GOD", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[272, 313], [288, 278], [326, 361], [316, 379]], "text": "KITE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3T", "recog_valid": false, "glyph_recog_text": "KITE", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[339, 468], [338, 513], [440, 511], [438, 470]], "text": "Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "ldfyehe", "recog_valid": false, "glyph_recog_text": "Photography", "glyph_recog_ld": 0.0909099173546205}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182642.jpg", "caption": "a motorcycle with a black tank and a white tank", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313720.jpg", "caption": "a man standing next to a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444794.jpg", "caption": "a woman in a kitchen holding a bowl of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313727.jpg", "caption": "a large clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575873.jpg", "caption": "a man dressed in a suit and hat with blood on his face", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444809.jpg", "caption": "a skateboarder is doing a trick on a ramp", "annotations": [{"polygon": [[166, 273], [176, 319], [292, 325], [290, 283], [198, 272]], "text": "VANS ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VANG", "recog_valid": false, "glyph_recog_text": "VANS", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444830.jpg", "caption": "a woman standing next to a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051618.jpg", "caption": "a plate of food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051620.jpg", "caption": "a baseball bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182706.jpg", "caption": "a man riding a horse drawn carriage down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051643.jpg", "caption": "a man on a bicycle carrying bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313792.jpg", "caption": "a man in a wet suit holding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182728.jpg", "caption": "a man and boy on a surfboard in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313847.jpg", "caption": "a young boy is swinging a baseball bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576012.jpg", "caption": "a red double decker bus is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313873.jpg", "caption": "three young men sitting on a bench with skateboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576026.jpg", "caption": "a yellow train engine sitting on the tracks", "annotations": [{"polygon": [[129, 208], [129, 230], [160, 224], [161, 199]], "text": "252", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "252", "recog_valid": true, "glyph_recog_text": "252", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444952.jpg", "caption": "a man with a camera and a cell phone", "annotations": [{"polygon": [[163, 81], [163, 81], [200, 65], [204, 76], [182, 116], [177, 118]], "text": "T-", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "T.", "recog_valid": false, "glyph_recog_text": "一", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[175, 120], [189, 110], [216, 106], [214, 148], [199, 154], [172, 161]], "text": "N", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Z", "recog_valid": false, "glyph_recog_text": "z", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444953.jpg", "caption": "two men playing tennis on a tennis court", "annotations": [{"polygon": [[206, 121], [202, 156], [387, 161], [388, 126], [246, 121]], "text": "J.P. Morgan", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "J.PMorgan", "recog_valid": false, "glyph_recog_text": "J.P. Morgan", "glyph_recog_ld": 0.8181819834709241}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182821.jpg", "caption": "a bowl of soup with chopsticks and shrimp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182828.jpg", "caption": "a person skiing down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051777.jpg", "caption": "a dog is walking under a bench in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051783.jpg", "caption": "a bird sitting on a branch with a blue sky in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313927.jpg", "caption": "two dogs sitting in the back of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445006.jpg", "caption": "a blue bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313956.jpg", "caption": "a plate with fruit on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576119.jpg", "caption": "a church with a cross on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576122.jpg", "caption": "a group of three double decker buses parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313984.jpg", "caption": "a pair of scissors and a ruler on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445075.jpg", "caption": "a white suv is parked at a railroad crossing", "annotations": [{"polygon": [[101, 147], [133, 119], [137, 123], [104, 152], [99, 148]], "text": "RAIL ROAD CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CROSSINE", "recog_valid": false, "glyph_recog_text": "315-1-24921", "glyph_recog_ld": 9.090900826569381e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445080.jpg", "caption": "a blue and silver motorcycle parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182937.jpg", "caption": "a snowboarder is doing a trick on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182947.jpg", "caption": "a man and a woman sitting with a child at a picnic table", "annotations": [{"polygon": [[45, 300], [43, 276], [107, 280], [109, 306]], "text": "ALUMINI", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ALUMNI", "recog_valid": false, "glyph_recog_text": "ALUMINI", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[10, 274], [14, 308], [47, 310], [44, 276]], "text": "UCAA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "UCA", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[33, 342], [33, 369], [52, 362], [71, 364], [89, 374], [92, 350], [69, 340], [50, 339]], "text": "UCA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "C", "recog_valid": false, "glyph_recog_text": "UCA", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182956.jpg", "caption": "a man standing in a bathroom", "annotations": [{"polygon": [[307, 60], [317, 52], [346, 41], [369, 38], [413, 48], [428, 57], [422, 72], [381, 55], [354, 56], [312, 72]], "text": "NEWCASTLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LONEAO", "recog_valid": false, "glyph_recog_text": "NEWCASTLE", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576188.jpg", "caption": "two soccer players are playing on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576187.jpg", "caption": "two police officers on motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314050.jpg", "caption": "a group of people walking on a train platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314069.jpg", "caption": "a stop sign on a street corner", "annotations": [{"polygon": [[234, 125], [233, 155], [308, 156], [309, 126]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445143.jpg", "caption": "the contents of a brush, toothbrush, and toothpaste", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576212.jpg", "caption": "a computer monitor and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183007.jpg", "caption": "a young baseball player standing on a field", "annotations": [{"polygon": [[146, 215], [151, 190], [162, 188], [173, 189], [171, 198], [225, 202], [217, 210], [212, 212], [163, 219], [150, 220]], "text": "Slammer", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Slamne", "recog_valid": false, "glyph_recog_text": "Slammer", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183012.jpg", "caption": "a reflection of a stop sign and a street sign", "annotations": [{"polygon": [[114, 115], [123, 141], [187, 120], [220, 110], [232, 106], [228, 84], [213, 90], [180, 101], [160, 109], [134, 115], [128, 109]], "text": "Commo", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Comrmo", "recog_valid": false, "glyph_recog_text": "Commo", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[249, 99], [264, 96], [276, 91], [292, 86], [306, 80], [299, 64], [292, 62], [286, 68], [271, 72], [256, 78], [248, 81], [241, 77], [240, 77]], "text": "jore", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "lere", "recog_valid": false, "glyph_recog_text": "jore", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[213, 215], [241, 207], [265, 196], [273, 197], [298, 190], [306, 185], [333, 177], [347, 198], [331, 207], [337, 221], [228, 259]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051941.jpg", "caption": "a lamp, a vase, and a clock on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314092.jpg", "caption": "a woman crossing the street with a camera", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445175.jpg", "caption": "a desk with two laptops and a computer on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314109.jpg", "caption": "a bus driving down a city street with people walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183044.jpg", "caption": "a group of people holding a banner that says lathatos kosk", "annotations": [{"polygon": [[256, 239], [256, 239], [255, 272], [403, 258], [401, 229], [401, 229]], "text": "LESZBiKUSOK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LESZBiKUSOK", "recog_valid": true, "glyph_recog_text": "LESZBiKUSOK", "glyph_recog_ld": 1.0}, {"polygon": [[81, 240], [82, 292], [82, 292], [248, 275], [246, 230], [81, 239], [81, 239]], "text": "LATHATO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LATHATO", "recog_valid": true, "glyph_recog_text": "LATHATO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051982.jpg", "caption": "a young man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051985.jpg", "caption": "a taxi cab is parked in the middle of a busy city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051990.jpg", "caption": "a train is on the tracks in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445220.jpg", "caption": "a double decker bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183080.jpg", "caption": "logitech wireless keyboard and mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052010.jpg", "caption": "a person holding a cell phone with a map on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052012.jpg", "caption": "a table with fruit and other food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314167.jpg", "caption": "a blender with bananas and other food in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576317.jpg", "caption": "a toilet seat with a hole in it", "annotations": [{"polygon": [[18, 151], [28, 154], [46, 135], [72, 129], [81, 142], [91, 139], [90, 126], [123, 119], [118, 78], [15, 108], [8, 132], [8, 140]], "text": "CitiKitty", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Ci", "recog_valid": false, "glyph_recog_text": "CitiKitty", "glyph_recog_ld": 0.22222308641879285}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183112.jpg", "caption": "a grill with hot dogs and hamburgers on it", "annotations": [{"polygon": [[118, 179], [118, 179], [158, 164], [168, 182], [130, 198]], "text": "-SHS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SHS", "recog_valid": false, "glyph_recog_text": "-SHS", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314187.jpg", "caption": "pizza hut buffet - the best pizza buffet in the world", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445263.jpg", "caption": "a woman laying in a tent with her dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314194.jpg", "caption": "a motorcycle is on display at a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183123.jpg", "caption": "a yellow double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314203.jpg", "caption": "a computer monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576389.jpg", "caption": "a table with wine glasses and a menu", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445325.jpg", "caption": "a rack of snowboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314257.jpg", "caption": "three men are working on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445338.jpg", "caption": "a kitchen with a refrigerator, stove and cabinets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052123.jpg", "caption": "a woman sitting on the ground next to a bunch of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052131.jpg", "caption": "a plate of salad and a newspaper on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576417.jpg", "caption": "a small airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314279.jpg", "caption": "a stop sign and a street sign on a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576430.jpg", "caption": "a box of donuts with sprinkles and pink icing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183219.jpg", "caption": "a woman eating a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183236.jpg", "caption": "a desk with three monitors and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576468.jpg", "caption": "a skier in the air with his skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445405.jpg", "caption": "two young men playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052193.jpg", "caption": "a tennis player in purple shirt and black shorts", "annotations": [{"polygon": [[357, 251], [515, 268], [513, 353], [336, 321]], "text": "BAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BAR", "recog_valid": true, "glyph_recog_text": "BAR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183292.jpg", "caption": "a microwave oven above a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576518.jpg", "caption": "a baseball player swinging at a pitch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314377.jpg", "caption": "a train traveling down a river with trees and mountains in the background", "annotations": [{"polygon": [[460, 384], [505, 384], [505, 424], [461, 424]], "text": "p", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "p", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576527.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445461.jpg", "caption": "a parking meter on a sidewalk next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052250.jpg", "caption": "a man bending over to hit a baseball with a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314398.jpg", "caption": "michael jordan dunks the ball in the air", "annotations": [{"polygon": [[287, 124], [289, 144], [397, 125], [394, 104]], "text": "Gatorade", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Gatorade", "recog_valid": true, "glyph_recog_text": "Gatorade", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576543.jpg", "caption": "two trains are on the tracks with a man standing next to them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052256.jpg", "caption": "a street sign with a green light and a red light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183327.jpg", "caption": "a road closed sign is sitting on a pile of dirt", "annotations": [{"polygon": [[429, 336], [313, 338], [312, 368], [429, 361]], "text": "CLOSED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CLOSED", "recog_valid": true, "glyph_recog_text": "CLOSED", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314414.jpg", "caption": "a group of people standing around looking at a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445491.jpg", "caption": "a sign has texts", "annotations": [{"polygon": [[160, 241], [225, 231], [229, 252], [161, 264]], "text": "REPTILE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REPTILE", "recog_valid": true, "glyph_recog_text": "REPTILE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445493.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183360.jpg", "caption": "a steam engine train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314436.jpg", "caption": "a building with a large sign on the front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314439.jpg", "caption": "a group of motorcycles parked on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052299.jpg", "caption": "people are standing in line to buy food from food trucks", "annotations": [{"polygon": [[362, 323], [364, 334], [465, 370], [463, 360], [422, 344]], "text": "TAQUEROFUSION.COM", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "TWOUEROSiON 55", "recog_valid": false, "glyph_recog_text": "TNOUNEROPUBNONCOAM", "glyph_recog_ld": 0.4444447530862483}, {"polygon": [[350, 271], [346, 277], [372, 301], [377, 295]], "text": "CHICAGO'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "PRINOE", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[373, 236], [372, 256], [414, 266], [441, 274], [448, 267], [450, 246], [450, 238], [441, 226], [425, 240]], "text": "TAQUERO FUSION", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "uoon", "recog_valid": false, "glyph_recog_text": "TNaaoAneo", "glyph_recog_ld": 0.22222308641879285}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445522.jpg", "caption": "a woman is kissing a giraffe at a zoo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052320.jpg", "caption": "a pair of scissors sitting on top of a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183392.jpg", "caption": "a plate with rice, eggs and sausage on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445540.jpg", "caption": "a blackberry, a blackberry, and a blackberry", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052324.jpg", "caption": "a man in a red shirt and tie standing in front of a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183412.jpg", "caption": "a large airplane parked at an airport with a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183413.jpg", "caption": "a bench sitting on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183427.jpg", "caption": "a woman sitting at a table with a pizza and a beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314530.jpg", "caption": "a man in a hat and jacket talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445605.jpg", "caption": "a wall with clocks on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052406.jpg", "caption": "a truck with logs in the back", "annotations": [{"polygon": [[214, 407], [214, 445], [312, 448], [494, 402], [481, 359]], "text": "TOYOTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TOYOTA", "recog_valid": true, "glyph_recog_text": "TOYOTA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445639.jpg", "caption": "a bus with a bicycle on the front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052426.jpg", "caption": "a man with tattoos holding a glass of wine", "annotations": [{"polygon": [[388, 263], [380, 284], [422, 302], [428, 279]], "text": "GONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "GONE", "recog_valid": true, "glyph_recog_text": "GONE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314570.jpg", "caption": "people standing next to a bus with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052437.jpg", "caption": "a lunch box with rice, eggs, oranges and a drink", "annotations": [{"polygon": [[75, 412], [68, 327], [92, 329], [106, 417]], "text": "BUENO", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ouang", "recog_valid": false, "glyph_recog_text": "mDwz", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052440.jpg", "caption": "an advertisement for the new samsonite silouette", "annotations": [{"polygon": [[132, 435], [129, 465], [290, 463], [287, 437]], "text": "Samsonite", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Samsonite", "recog_valid": true, "glyph_recog_text": "Samsonite", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576752.jpg", "caption": "a man is standing in the middle of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576756.jpg", "caption": "a black train engine sitting in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314614.jpg", "caption": "a man sitting at a desk eating a pizza", "annotations": [{"polygon": [[377, 117], [382, 133], [438, 89], [436, 77], [423, 79], [405, 94]], "text": "TION", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "ONTION", "recog_valid": false, "glyph_recog_text": "TION", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314626.jpg", "caption": "a laptop computer and a printer on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052484.jpg", "caption": "a slice of pizza and a can of soda on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052490.jpg", "caption": "a table with vegetables, cheese, and bread", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576780.jpg", "caption": "a pepsi stand with a lot of fruit and vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314681.jpg", "caption": "a red and white airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445759.jpg", "caption": "a toothbrush and toothpaste in a plastic package", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183617.jpg", "caption": "a street sign with the words wetherford and lawford", "annotations": [{"polygon": [[231, 222], [238, 241], [312, 212], [310, 196]], "text": "Wreford", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wreford", "recog_valid": true, "glyph_recog_text": "Wreford", "glyph_recog_ld": 1.0}, {"polygon": [[128, 232], [129, 248], [183, 285], [180, 266]], "text": "Lawton", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Lawton", "recog_valid": true, "glyph_recog_text": "Lawlon", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445774.jpg", "caption": "a pug wearing a green tie and a green frame", "annotations": [{"polygon": [[253, 445], [298, 447], [301, 476], [246, 473]], "text": "J'm", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "J'm", "recog_valid": true, "glyph_recog_text": "J'm", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445775.jpg", "caption": "a sign has texts", "annotations": [{"polygon": [[207, 182], [247, 196], [243, 213], [239, 211], [235, 206], [205, 199]], "text": "Henry", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Henry", "recog_valid": true, "glyph_recog_text": "Henry", "glyph_recog_ld": 1.0}, {"polygon": [[198, 195], [197, 218], [234, 226], [234, 206]], "text": "Henr Ford", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ford", "recog_valid": false, "glyph_recog_text": "Nant tord", "glyph_recog_ld": 0.333334074073251}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314703.jpg", "caption": "a baseball game with a crowd watching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445777.jpg", "caption": "a man eating a sandwich at a table", "annotations": [{"polygon": [[505, 286], [479, 281], [473, 291], [480, 314], [489, 317], [494, 304]], "text": "T", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "e", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314714.jpg", "caption": "a woman kneeling down to feed a dog a bottle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052582.jpg", "caption": "a woman riding a horse in a polo match", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314734.jpg", "caption": "a chicken wrap on a cutting board with a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576895.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445845.jpg", "caption": "a black and white photo of a large airplane flying over a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052634.jpg", "caption": "a woman standing next to a large vase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314788.jpg", "caption": "two airplanes are taking off from a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052648.jpg", "caption": "a man dressed as a tourist holding a suitcase", "annotations": [{"polygon": [[194, 186], [226, 159], [255, 205], [225, 229]], "text": "DES=E", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DES", "recog_valid": false, "glyph_recog_text": "DES=E", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[84, 161], [189, 163], [189, 236], [81, 243]], "text": "na", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "10", "recog_valid": false, "glyph_recog_text": "na", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[232, 242], [261, 223], [329, 334], [296, 360]], "text": "S=ENMIENDA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ENMIEND", "recog_valid": false, "glyph_recog_text": "S=ENMIENDA", "glyph_recog_ld": 0.7000002999996999}, {"polygon": [[312, 377], [341, 353], [363, 394], [334, 412]], "text": "SI", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SI", "recog_valid": true, "glyph_recog_text": "SI", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576938.jpg", "caption": "a blue surfboard sits on the beach next to a man", "annotations": [{"polygon": [[155, 402], [267, 386], [359, 377], [361, 393], [266, 410], [159, 427]], "text": "LIFEGUARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "UTPCoM/1-1", "recog_valid": false, "glyph_recog_text": "LIFEGUARD", "glyph_recog_ld": 9.99998999939855e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445879.jpg", "caption": "a person riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314816.jpg", "caption": "a street light is on in front of a theater", "annotations": [{"polygon": [[293, 271], [300, 249], [296, 155], [292, 97], [279, 98], [262, 129], [264, 206], [266, 252], [277, 270]], "text": "JOY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "503", "recog_valid": false, "glyph_recog_text": "-0>", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576963.jpg", "caption": "a woman in white shirt and blue shorts playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445906.jpg", "caption": "a green cell phone, a black cell phone, and a white cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183805.jpg", "caption": "a group of men eating hot dogs", "annotations": [{"polygon": [[0, 218], [0, 231], [37, 212], [32, 201]], "text": "Nathan's", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "lathene", "recog_valid": false, "glyph_recog_text": "isthan's", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183809.jpg", "caption": "a green double decker bus parked in a lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577035.jpg", "caption": "a woman walks across the street in front of a coca cola truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052751.jpg", "caption": "two baseball players high five after a home run", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183827.jpg", "caption": "harry potter's room in the harry potter museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314904.jpg", "caption": "a black and white photo of men playing basketball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577072.jpg", "caption": "a horse race with people watching from the stands", "annotations": [{"polygon": [[426, 238], [424, 251], [511, 284], [512, 269]], "text": "yorkracecourse.", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "yorkracecourse.", "recog_valid": true, "glyph_recog_text": "yorkacecourse", "glyph_recog_ld": 0.8666667555554963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577076.jpg", "caption": "a man on a skateboard doing a trick on a concrete bench", "annotations": [{"polygon": [[165, 282], [436, 287], [469, 391], [46, 384]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "1 2", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314944.jpg", "caption": "two men standing next to each other", "annotations": [{"polygon": [[268, 311], [259, 339], [290, 362], [297, 340]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1L", "recog_valid": false, "glyph_recog_text": "12", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[266, 212], [252, 261], [308, 298], [331, 245]], "text": "AMI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "g", "recog_valid": false, "glyph_recog_text": "AMI", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[197, 241], [197, 269], [233, 272], [235, 242]], "text": "TES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TE", "recog_valid": false, "glyph_recog_text": "TES", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052802.jpg", "caption": "a large red semi truck parked in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446033.jpg", "caption": "a statue of a bird with a clock in front of a tall building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183890.jpg", "caption": "a group of people kiteboarding on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314964.jpg", "caption": "two double decker buses parked under a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446034.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314968.jpg", "caption": "a man on a skateboard in a skate park", "annotations": [{"polygon": [[431, 160], [296, 176], [303, 221], [441, 209]], "text": "WONKA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WOAA", "recog_valid": false, "glyph_recog_text": "WONKA", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[429, 280], [479, 264], [510, 294], [512, 361]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[8, 328], [37, 303], [61, 327], [28, 360]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "M", "recog_valid": true, "glyph_recog_text": "M", "glyph_recog_ld": 1.0}, {"polygon": [[159, 335], [139, 345], [146, 357], [164, 366], [209, 378], [225, 374], [228, 364], [224, 360]], "text": "CEG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "QED", "recog_valid": false, "glyph_recog_text": "CEG", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577125.jpg", "caption": "a man riding a skateboard on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314993.jpg", "caption": "a wooden bench with a sign on it in front of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183923.jpg", "caption": "a skateboarder standing on a ledge with a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183953.jpg", "caption": "a woman standing on the deck of a boat looking out to sea", "annotations": [{"polygon": [[128, 196], [174, 202], [173, 246], [118, 255]], "text": "ADI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ADU", "recog_valid": false, "glyph_recog_text": "台", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577176.jpg", "caption": "four pictures of different types of pastries", "annotations": [{"polygon": [[77, 186], [83, 191], [173, 199], [182, 180], [147, 173], [123, 164], [87, 164], [83, 177], [86, 180]], "text": "Thelins", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Shelins", "recog_valid": false, "glyph_recog_text": "Thelins", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052897.jpg", "caption": "two women walking on a tennis court", "annotations": [{"polygon": [[427, 135], [427, 135], [422, 157], [393, 167], [384, 139]], "text": "Ciel", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Ciel", "recog_valid": true, "glyph_recog_text": "Ciel", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183980.jpg", "caption": "a computer keyboard and mouse on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052924.jpg", "caption": "a white and red motorcycle is on display at a show", "annotations": [{"polygon": [[-1, 144], [0, 152], [35, 125], [35, 118]], "text": "DAVIDSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "E-Dsa", "recog_valid": false, "glyph_recog_text": "PAPCHU", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052943.jpg", "caption": "a group of people playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184037.jpg", "caption": "a traffic light with a sign has texts", "annotations": [{"polygon": [[327, 327], [325, 358], [367, 358], [371, 341], [367, 329]], "text": "30", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "30", "recog_valid": true, "glyph_recog_text": "30", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052978.jpg", "caption": "a small store with a bench in front of it", "annotations": [{"polygon": [[258, 93], [376, 103], [376, 123], [255, 123]], "text": "Spigadoro", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Spigadoro", "recog_valid": true, "glyph_recog_text": "Spigadoro", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577270.jpg", "caption": "a palm tree in front of a building with a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052983.jpg", "caption": "a group of people standing on the side of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315156.jpg", "caption": "a cat is sitting in a bag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446231.jpg", "caption": "a group of people standing around a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315165.jpg", "caption": "a small boat in the water at night with a large ship in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315174.jpg", "caption": "a collage of pictures of food being prepared", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184109.jpg", "caption": "a group of people at a fruit stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577333.jpg", "caption": "three teddy bears sitting on a table with a graduation cap", "annotations": [{"polygon": [[477, 341], [508, 367], [510, 357], [488, 325], [478, 332]], "text": "Harrods", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Harnee", "recog_valid": false, "glyph_recog_text": "Harrod", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577343.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577351.jpg", "caption": "a man in uniform standing next to a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184138.jpg", "caption": "a man and woman working on a car", "annotations": [{"polygon": [[230, 9], [231, 9], [382, 4], [384, 71], [225, 77]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315211.jpg", "caption": "a neon sign on a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315218.jpg", "caption": "a young girl in a bunny costume holding two stuffed animals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184155.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[92, 162], [369, 145], [376, 253], [95, 267]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[201, 377], [236, 374], [234, 403], [205, 404]], "text": "ALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "(ALL", "recog_valid": false, "glyph_recog_text": "ALL", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446307.jpg", "caption": "a train pulling into a station with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053111.jpg", "caption": "a delta airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446328.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577411.jpg", "caption": "a man in a wet suit holding a surfboard on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184210.jpg", "caption": "a stop sign and traffic light on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053150.jpg", "caption": "a man walking an elephant down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446372.jpg", "caption": "a giraffe in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053184.jpg", "caption": "two men sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315339.jpg", "caption": "a woman in a purple dress is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315348.jpg", "caption": "a long exposure photograph of a freeway at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184282.jpg", "caption": "a train on the tracks with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315383.jpg", "caption": "a dog is eating a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446467.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315404.jpg", "caption": "a truck with a skull and crossbones on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577556.jpg", "caption": "a man and woman cutting a cake together", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577563.jpg", "caption": "air transat airbus a320-214-r nr 7058 at london london london london london", "annotations": [{"polygon": [[391, 221], [317, 245], [317, 228], [320, 221], [326, 217], [328, 221], [386, 202], [396, 195], [398, 202]], "text": "transat", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "trau1bat", "recog_valid": false, "glyph_recog_text": "transat", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[287, 234], [307, 224], [316, 227], [306, 248], [281, 254]], "text": "air", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ak", "recog_valid": false, "glyph_recog_text": "alr", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577583.jpg", "caption": "a couple sleeping in bed next to a clock", "annotations": [{"polygon": [[356, 263], [409, 281], [396, 310], [346, 289]], "text": "300", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "300", "recog_valid": true, "glyph_recog_text": "300", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446521.jpg", "caption": "a man walks past the sign for st bernard's hospital", "annotations": [{"polygon": [[88, 315], [165, 296], [166, 308], [90, 328]], "text": "ST.BRIDE'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ST. BRIDE'S", "recog_valid": false, "glyph_recog_text": "ST.ARIPES", "glyph_recog_ld": 0.6363639669418482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446531.jpg", "caption": "a red and silver fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577604.jpg", "caption": "a toilet in a bathroom with a sign on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446533.jpg", "caption": "a snowboarder is doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446536.jpg", "caption": "a window display with a rainbow umbrella and a sign that says university of south carolina", "annotations": [{"polygon": [[126, 186], [141, 208], [153, 201], [166, 188], [190, 175], [216, 165], [210, 140], [196, 142], [174, 151], [157, 161], [141, 173], [128, 181]], "text": "SUSSEX BOO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S0SSEX", "recog_valid": false, "glyph_recog_text": "SUSSEXBOO", "glyph_recog_ld": 0.5555560493821674}, {"polygon": [[223, 138], [247, 141], [275, 147], [301, 163], [326, 190], [329, 194], [313, 211], [304, 203], [305, 195], [296, 183], [274, 172], [256, 163], [230, 163], [222, 162], [220, 143]], "text": "UNIVERSITY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNIVERSIT!", "recog_valid": false, "glyph_recog_text": "UNIVERSITY", "glyph_recog_ld": 0.9000000999999}, {"polygon": [[113, 403], [112, 415], [161, 404], [156, 393], [136, 383], [118, 392]], "text": "CHOCOLATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "67", "recog_valid": false, "glyph_recog_text": "PeOoOLueE", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184397.jpg", "caption": "a group of police motorcycles parked on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446557.jpg", "caption": "a young girl holding a baseball mitt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053347.jpg", "caption": "a woman cutting a cake", "annotations": [{"polygon": [[188, 374], [188, 374], [212, 362], [200, 334], [200, 334], [172, 344], [172, 344], [186, 375]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "套", "recog_valid": false, "glyph_recog_text": "g", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[246, 362], [245, 362], [264, 353], [238, 313], [215, 320]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "hpputh", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.12500109374863277}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315508.jpg", "caption": "a stop sign on a street corner with a red truck", "annotations": [{"polygon": [[377, 140], [444, 136], [444, 106], [378, 113]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053370.jpg", "caption": "a group of motorcycles parked in a gravel lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315521.jpg", "caption": "a pile of apples in bags with a label on them", "annotations": [{"polygon": [[201, 112], [212, 125], [204, 130], [170, 147], [166, 149], [156, 125], [177, 118], [196, 112]], "text": "SKI-HI", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "IHIXS", "recog_valid": false, "glyph_recog_text": "SKI HI", "glyph_recog_ld": 0.1666680555532407}, {"polygon": [[344, 105], [350, 121], [336, 120], [330, 120], [321, 123], [310, 138], [303, 132], [302, 122], [314, 113], [324, 106], [332, 105], [338, 105]], "text": "SKI-HI", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "1", "recog_valid": false, "glyph_recog_text": "SKI-HI", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[31, 353], [35, 372], [44, 369], [52, 370], [73, 380], [86, 355], [51, 349], [41, 349]], "text": "APP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ddV", "recog_valid": false, "glyph_recog_text": "APP", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[141, 295], [144, 310], [155, 305], [173, 302], [186, 305], [191, 300], [190, 276], [165, 276], [154, 280]], "text": "SKH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1US", "recog_valid": false, "glyph_recog_text": "SKH", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[246, 346], [245, 357], [261, 352], [275, 345], [283, 345], [297, 346], [300, 328], [288, 326], [271, 326], [252, 336]], "text": "APPLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "anddy", "recog_valid": false, "glyph_recog_text": "APPLES", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[350, 347], [355, 366], [357, 369], [376, 362], [387, 354], [420, 346], [422, 326], [392, 331]], "text": "APPLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S37ddV", "recog_valid": false, "glyph_recog_text": "APPLES", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[341, 266], [345, 287], [402, 276], [404, 272], [396, 256]], "text": "SKI-HI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IHIXS", "recog_valid": false, "glyph_recog_text": "SKI-HI", "glyph_recog_ld": 0.1666680555532407}, {"polygon": [[2, 177], [2, 191], [43, 216], [49, 203], [44, 193], [20, 178]], "text": "APPLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "1ldoy", "recog_valid": false, "glyph_recog_text": "APPLE", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[9, 126], [9, 144], [48, 157], [50, 148], [37, 135], [19, 126]], "text": "HI", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HU", "recog_valid": false, "glyph_recog_text": "H I", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053385.jpg", "caption": "a black and white photo of a keyboard and mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577685.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446618.jpg", "caption": "a stop sign is on the side of the railroad tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184477.jpg", "caption": "a yellow truck with a large crane on it", "annotations": [{"polygon": [[246, 229], [248, 239], [204, 279], [200, 268]], "text": "HEWDEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HEWDEA", "recog_valid": false, "glyph_recog_text": "HEWOEN", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446646.jpg", "caption": "a street sign has texts written in english", "annotations": [{"polygon": [[108, 306], [242, 274], [243, 294], [108, 326]], "text": "YUKANYHORN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YUKANTHORNO", "recog_valid": false, "glyph_recog_text": "YUKANYHORN", "glyph_recog_ld": 0.8181819834709241}, {"polygon": [[424, 323], [426, 340], [482, 361], [480, 344]], "text": "VANN NO 178", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "VAK MO1 T8", "recog_valid": false, "glyph_recog_text": "VANN NO ETE", "glyph_recog_ld": 0.45454595041277235}, {"polygon": [[371, 299], [370, 318], [424, 338], [422, 321]], "text": "GMAKHAK", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MAKHAK", "recog_valid": false, "glyph_recog_text": "GMAKHAK", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577723.jpg", "caption": "a clock on a building that says first national bank", "annotations": [{"polygon": [[229, 284], [295, 287], [294, 317], [228, 314]], "text": "BANK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BANK", "recog_valid": true, "glyph_recog_text": "BANK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446655.jpg", "caption": "a white bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446670.jpg", "caption": "a table with four chairs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577748.jpg", "caption": "a street sign on a pole in front of a tall building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446677.jpg", "caption": "a man in a suit holding a sheep in a pen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053464.jpg", "caption": "a man cutting a cake", "annotations": [{"polygon": [[17, 413], [19, 410], [22, 410], [27, 413], [28, 415], [39, 410], [58, 402], [75, 395], [74, 394], [75, 392], [76, 392], [77, 394], [89, 389], [89, 386], [90, 385], [93, 387], [93, 388], [99, 385], [102, 382], [102, 382], [102, 379], [103, 378], [105, 379], [105, 380], [108, 378], [110, 381], [120, 375], [125, 373], [130, 373], [131, 376], [131, 379], [61, 409], [63, 411], [63, 413], [56, 416], [55, 414], [55, 411], [37, 420], [31, 423], [28, 427], [24, 428], [20, 427], [18, 424], [16, 423], [15, 418], [16, 416]], "text": "congratulations", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Cnuanaruetieao", "recog_valid": false, "glyph_recog_text": "congratulations", "glyph_recog_ld": 0.2666671555552296}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577758.jpg", "caption": "a bus driving down a street with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053479.jpg", "caption": "a cat sitting on top of a laptop computer", "annotations": [{"polygon": [[327, 386], [330, 400], [377, 383], [375, 370]], "text": "BRITAIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "BRITAIN", "recog_valid": true, "glyph_recog_text": "BRITAIN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184556.jpg", "caption": "a row of bicycles parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315663.jpg", "caption": "a black cat sleeping on a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053533.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053539.jpg", "caption": "a man sitting on a bed with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577835.jpg", "caption": "a large orange truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315719.jpg", "caption": "a woman riding a bicycle on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315728.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184669.jpg", "caption": "a coca cola truck driving down the street", "annotations": [{"polygon": [[287, 261], [252, 262], [252, 226], [288, 232]], "text": "CocaCola", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "lunhe", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446815.jpg", "caption": "a man walking a horse on a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446813.jpg", "caption": "a vintage photo of a street with a clock tower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446818.jpg", "caption": "a person skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315751.jpg", "caption": "three people standing in the snow holding skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446827.jpg", "caption": "a woman is standing next to a colorful tuk tuk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446832.jpg", "caption": "a stop sign with a bunch of stickers on it", "annotations": [{"polygon": [[186, 61], [224, 72], [230, 41], [187, 30]], "text": "NW", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "NW", "recog_valid": true, "glyph_recog_text": "NW", "glyph_recog_ld": 1.0}, {"polygon": [[349, 104], [347, 137], [310, 145], [311, 112]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "ST", "glyph_recog_ld": 1.0}, {"polygon": [[233, 177], [240, 178], [281, 163], [280, 112], [233, 132]], "text": "23", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "N", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[202, 358], [202, 392], [137, 381], [147, 349]], "text": "ATM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ATM", "recog_valid": true, "glyph_recog_text": "ATM", "glyph_recog_ld": 1.0}, {"polygon": [[282, 463], [282, 497], [317, 501], [313, 468]], "text": "DR", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DR", "recog_valid": true, "glyph_recog_text": "DR", "glyph_recog_ld": 1.0}, {"polygon": [[285, 349], [284, 394], [333, 409], [369, 409], [367, 377], [337, 357]], "text": "CROOK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CROOK", "recog_valid": true, "glyph_recog_text": "CROOK", "glyph_recog_ld": 1.0}, {"polygon": [[204, 360], [205, 379], [245, 396], [257, 360]], "text": "DaDE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DRD", "recog_valid": false, "glyph_recog_text": "DaDE", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446834.jpg", "caption": "a man holding a hot dog in his hand", "annotations": [{"polygon": [[228, 388], [243, 384], [256, 386], [279, 386], [295, 386], [310, 385], [314, 412], [274, 415], [263, 413], [253, 414], [249, 412], [247, 409], [235, 412]], "text": "FREE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "FEE", "recog_valid": false, "glyph_recog_text": "FREE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315768.jpg", "caption": "a birthday cake with a lion on it", "annotations": [{"polygon": [[276, 319], [299, 332], [312, 310], [319, 295], [323, 286], [311, 283], [303, 283], [277, 318]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bithieg", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577925.jpg", "caption": "a man and woman walking down a street with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184709.jpg", "caption": "a street sign with no parking signs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053644.jpg", "caption": "a man holding a cupcake in front of a poster", "annotations": [{"polygon": [[303, 164], [308, 188], [375, 179], [376, 158]], "text": "donut", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "donut", "recog_valid": true, "glyph_recog_text": "donut", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446864.jpg", "caption": "a bowl of strawberries, carrots and a bowl of dip", "annotations": [{"polygon": [[385, 409], [417, 417], [410, 430], [360, 444]], "text": "fedore", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "hilm", "recog_valid": false, "glyph_recog_text": "fedore", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[410, 441], [427, 411], [497, 412], [485, 440]], "text": "photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": " photggrorly", "recog_valid": false, "glyph_recog_text": "Shotography", "glyph_recog_ld": 0.5000004166663194}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053662.jpg", "caption": "a giraffe is standing in a zoo enclosure", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315808.jpg", "caption": "a train traveling down the tracks near a stadium", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446882.jpg", "caption": "an f-4 phantom fighter jet flying over the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053672.jpg", "caption": "a soccer game with a referee and players in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184751.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446899.jpg", "caption": "a crowd of people standing around a truck with a sign", "annotations": [{"polygon": [[409, 121], [476, 145], [477, 127], [450, 117], [433, 109], [419, 103], [407, 97], [404, 102], [404, 109], [405, 116], [406, 119]], "text": "GUARDIAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "GUATIAT", "recog_valid": false, "glyph_recog_text": "GUARDIAN", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[282, 184], [282, 202], [308, 203], [312, 209], [339, 211], [343, 176]], "text": "Shargins", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "hopgius", "recog_valid": false, "glyph_recog_text": "Shargins", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446901.jpg", "caption": "a group of people walking on the beach with surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315830.jpg", "caption": "a large clock on a pole in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577975.jpg", "caption": "a woman in dressage gear riding a horse", "annotations": [{"polygon": [[350, 291], [352, 345], [414, 346], [423, 290]], "text": "IRT", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "IRT", "recog_valid": true, "glyph_recog_text": "IRT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053696.jpg", "caption": "a motorcycle with a large wheel on top of it", "annotations": [{"polygon": [[396, 36], [392, 63], [426, 90], [428, 58]], "text": "SIEME", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "SENEI", "recog_valid": false, "glyph_recog_text": "SIEME", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578002.jpg", "caption": "a person holding a wine glass", "annotations": [{"polygon": [[272, 191], [272, 157], [335, 156], [358, 151], [362, 181], [334, 188], [302, 190]], "text": "afto", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "afton", "recog_valid": false, "glyph_recog_text": "afto", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[405, 120], [405, 136], [449, 119], [446, 103], [425, 110]], "text": "afto", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "aftcl", "recog_valid": false, "glyph_recog_text": "afto", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578009.jpg", "caption": "a suitcase with clothes and other items inside", "annotations": [{"polygon": [[261, 266], [292, 261], [323, 251], [325, 268], [294, 277], [262, 283]], "text": "Packing", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Packing", "recog_valid": true, "glyph_recog_text": "Packing", "glyph_recog_ld": 1.0}, {"polygon": [[373, 223], [392, 216], [410, 209], [430, 201], [442, 198], [443, 213], [429, 218], [398, 231], [374, 240]], "text": "Luggage", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Luggage", "recog_valid": true, "glyph_recog_text": "Luggnge", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[372, 260], [406, 243], [408, 261], [373, 279]], "text": "right?", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "right", "recog_valid": false, "glyph_recog_text": "right?", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[312, 290], [361, 269], [362, 285], [314, 308]], "text": "empty", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "empty", "recog_valid": true, "glyph_recog_text": "empty", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315873.jpg", "caption": "a row of blue buses parked in a lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053729.jpg", "caption": "a group of people standing on a beach with surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578023.jpg", "caption": "a baseball player is standing on a field with a bat", "annotations": [{"polygon": [[297, 170], [297, 170], [300, 166], [322, 163], [324, 165], [324, 175], [324, 183], [326, 188], [329, 196], [327, 200], [308, 205], [299, 204]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "二", "recog_valid": false, "glyph_recog_text": "-(", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184822.jpg", "caption": "a white bus with a red and brown stripe", "annotations": [{"polygon": [[0, 304], [-1, 331], [84, 335], [132, 326], [138, 298]], "text": "TOLINJAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "TOLINJAT", "recog_valid": true, "glyph_recog_text": "TOLINJAT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578070.jpg", "caption": "a bus with children standing in front of it", "annotations": [{"polygon": [[215, 241], [248, 232], [258, 231], [260, 234], [259, 241], [258, 244], [260, 249], [259, 255], [212, 264], [214, 241]], "text": "MEMBER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MEMBER", "recog_valid": true, "glyph_recog_text": "MEMBER", "glyph_recog_ld": 1.0}, {"polygon": [[265, 228], [309, 219], [310, 222], [307, 233], [307, 247], [267, 254], [267, 235], [265, 235]], "text": "TODAY!", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TODAY", "recog_valid": false, "glyph_recog_text": "TODAY!", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[355, 288], [460, 281], [467, 285], [468, 289], [466, 295], [468, 303], [467, 308], [462, 312], [358, 313], [358, 307], [360, 306], [360, 299], [355, 300]], "text": "TIGERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "TIGERS", "recog_valid": true, "glyph_recog_text": "TIGERS", "glyph_recog_ld": 1.0}, {"polygon": [[232, 205], [231, 232], [282, 221], [281, 190]], "text": "BOARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "BOARD", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[180, 219], [177, 244], [208, 240], [207, 214]], "text": "JUMP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "02", "recog_valid": false, "glyph_recog_text": "JUMP", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053793.jpg", "caption": "a group of people sitting at a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184866.jpg", "caption": "a woman laying on the toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315944.jpg", "caption": "two puppies are in a bowl with food in it", "annotations": [{"polygon": [[83, 392], [74, 411], [129, 424], [133, 416]], "text": "paws", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "paws", "recog_valid": true, "glyph_recog_text": "paws", "glyph_recog_ld": 1.0}, {"polygon": [[34, 351], [26, 369], [65, 403], [81, 387]], "text": "Dirty", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Dirty", "recog_valid": true, "glyph_recog_text": "Dirty", "glyph_recog_ld": 1.0}, {"polygon": [[338, 382], [347, 400], [387, 372], [374, 357]], "text": "food!", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "food", "recog_valid": false, "glyph_recog_text": "food!", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[300, 408], [308, 419], [339, 404], [334, 389]], "text": "wet", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "wet", "recog_valid": true, "glyph_recog_text": "欧et", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184877.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184889.jpg", "caption": "a group of people sitting on the ground near a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053825.jpg", "caption": "a baseball game in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578114.jpg", "caption": "a table with scissors, tape, and a tube of glue", "annotations": [{"polygon": [[46, 217], [-1, 233], [-1, 248], [59, 227]], "text": "OER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "AOFP", "recog_valid": false, "glyph_recog_text": "OER", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[48, 197], [-1, 214], [0, 228], [64, 206]], "text": "KOOL", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "KOOL", "recog_valid": true, "glyph_recog_text": "KOOL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447044.jpg", "caption": "a cow and two crows on a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578119.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[86, 281], [84, 314], [201, 314], [201, 280]], "text": "ADER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ADER", "recog_valid": true, "glyph_recog_text": "ADER", "glyph_recog_ld": 1.0}, {"polygon": [[413, 12], [413, 12], [416, 55], [271, 58], [267, 14]], "text": "YPF", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "YPF", "recog_valid": true, "glyph_recog_text": "YPF", "glyph_recog_ld": 1.0}, {"polygon": [[331, 251], [332, 297], [424, 296], [424, 250]], "text": "MEDI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MEDI", "recog_valid": true, "glyph_recog_text": "MEDI", "glyph_recog_ld": 1.0}, {"polygon": [[161, 245], [164, 280], [87, 282], [85, 247]], "text": "OSBA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "OSBA", "recog_valid": true, "glyph_recog_text": "OSBA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184905.jpg", "caption": "a red and white bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315994.jpg", "caption": "a purse, a camera, a cell phone, a pen, a notebook, a pen holder, a camera, a pen, a notebook, a pen holder", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184924.jpg", "caption": "a woman riding a horse in an arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316006.jpg", "caption": "a cake with candles on it", "annotations": [{"polygon": [[227, 81], [257, 40], [296, 95], [279, 116], [238, 130]], "text": "H", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "H", "recog_valid": true, "glyph_recog_text": "工", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316012.jpg", "caption": "two men in red and white soccer uniforms are playing a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447091.jpg", "caption": "a neon sign with a clock on it", "annotations": [{"polygon": [[264, 336], [264, 336], [302, 310], [314, 330], [280, 353]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TYE", "recog_valid": false, "glyph_recog_text": "THE", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[310, 305], [310, 305], [408, 244], [429, 256], [323, 326]], "text": "ORIGINAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ORIGINAL", "recog_valid": true, "glyph_recog_text": "ORIGINAL", "glyph_recog_ld": 1.0}, {"polygon": [[402, 396], [400, 428], [475, 427], [474, 390]], "text": "COLD", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "COLD", "recog_valid": true, "glyph_recog_text": "COLD", "glyph_recog_ld": 1.0}, {"polygon": [[104, 218], [107, 236], [134, 248], [138, 229]], "text": "Young", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Youn", "recog_valid": false, "glyph_recog_text": "Young", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[203, 198], [197, 340], [507, 190], [493, 118]], "text": "Philippe", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Plhigiee", "recog_valid": false, "glyph_recog_text": "Philippe", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184965.jpg", "caption": "a street sign that says elephant and castle walworth", "annotations": [{"polygon": [[105, 134], [106, 102], [136, 104], [134, 134]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "A", "recog_valid": true, "glyph_recog_text": "A", "glyph_recog_ld": 1.0}, {"polygon": [[143, 100], [142, 132], [259, 132], [260, 101]], "text": "3204", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "3204", "recog_valid": true, "glyph_recog_text": "3204", "glyph_recog_ld": 1.0}, {"polygon": [[119, 207], [120, 251], [314, 246], [310, 202]], "text": "Elephant", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Elephant", "recog_valid": true, "glyph_recog_text": "Elephant", "glyph_recog_ld": 1.0}, {"polygon": [[170, 250], [166, 288], [310, 288], [310, 254]], "text": "Castle", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Castle", "recog_valid": true, "glyph_recog_text": "Castle", "glyph_recog_ld": 1.0}, {"polygon": [[116, 308], [117, 347], [329, 345], [326, 308]], "text": "Wlworth", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Walworth", "recog_valid": false, "glyph_recog_text": "Wlworth", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447118.jpg", "caption": "two men in orange and white uniforms playing basketball", "annotations": [{"polygon": [[397, 147], [392, 174], [426, 179], [425, 149]], "text": "EC", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ECI", "recog_valid": false, "glyph_recog_text": "EC", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[120, 321], [128, 310], [149, 302], [156, 329], [129, 337]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "12", "glyph_recog_ld": 1.0}, {"polygon": [[317, 347], [346, 334], [360, 347], [331, 371]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3", "recog_valid": true, "glyph_recog_text": "3", "glyph_recog_ld": 1.0}, {"polygon": [[344, 167], [328, 181], [326, 195], [378, 197], [379, 183], [371, 175], [350, 172]], "text": "Clarks", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Claus", "recog_valid": false, "glyph_recog_text": "Clarks", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578193.jpg", "caption": "a traffic light on a pole with a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184980.jpg", "caption": "a yellow and white trolley bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447135.jpg", "caption": "a yellow volkswagen beetle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185002.jpg", "caption": "a street with traffic lights and a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316075.jpg", "caption": "a black and white photo of a man playing soccer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578231.jpg", "caption": "a person cutting an apple on a cutting board", "annotations": [{"polygon": [[426, 403], [436, 412], [459, 406], [481, 397], [506, 385], [492, 378], [471, 388], [448, 396]], "text": "OUTDOOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "MOOOIRO", "recog_valid": false, "glyph_recog_text": "OUTDOOR", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578232.jpg", "caption": "fedex air cargo plane taking off in the sky", "annotations": [{"polygon": [[158, 204], [155, 229], [229, 258], [234, 234]], "text": "FedEx", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FedEx", "recog_valid": true, "glyph_recog_text": "FedEx", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578233.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185025.jpg", "caption": "a baseball game with a coca cola balloon in the sky", "annotations": [{"polygon": [[331, 123], [342, 126], [352, 135], [358, 159], [366, 159], [379, 155], [378, 149], [371, 127], [352, 112]], "text": "CoCo Cola", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "p6oh", "recog_valid": false, "glyph_recog_text": "Coco cdia", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316109.jpg", "caption": "a boat with pink flags on it is traveling down the river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316113.jpg", "caption": "a man riding a wave on a surfboard", "annotations": [{"polygon": [[142, 100], [245, 109], [231, 134], [134, 124]], "text": "Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Chetegrophy", "recog_valid": false, "glyph_recog_text": "Photography", "glyph_recog_ld": 0.6363639669418482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053975.jpg", "caption": "a cart with a lot of luggage on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053977.jpg", "caption": "a yellow cart with a man sitting in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185051.jpg", "caption": "a table full of food and drinks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447197.jpg", "caption": "a little girl wearing a crown", "annotations": [{"polygon": [[241, 181], [305, 173], [308, 193], [246, 205]], "text": "Party", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Pafty", "recog_valid": false, "glyph_recog_text": "Party", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[275, 198], [308, 184], [314, 202], [305, 211], [282, 216]], "text": "Tim", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "T品", "recog_valid": false, "glyph_recog_text": "Tim", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185080.jpg", "caption": "a man in a baseball uniform holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054012.jpg", "caption": "a group of people on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185082.jpg", "caption": "a black and white train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185087.jpg", "caption": "a woman is standing next to a plane", "annotations": [{"polygon": [[418, 98], [394, 120], [414, 140], [434, 120]], "text": "Continental", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "lOoa", "recog_valid": false, "glyph_recog_text": "Tnlaea", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578306.jpg", "caption": "a large airplane sitting on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578307.jpg", "caption": "a woman in a red dress holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185108.jpg", "caption": "a plate of food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578326.jpg", "caption": "a man and a woman are cooking in an oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316190.jpg", "caption": "a bathroom mirror with a gold frame and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316194.jpg", "caption": "a red double decker bus", "annotations": [{"polygon": [[258, 121], [261, 167], [419, 192], [422, 158]], "text": "FAIRHAVEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FAIRHAVEN", "recog_valid": true, "glyph_recog_text": "FAIRHAVEN", "glyph_recog_ld": 1.0}, {"polygon": [[427, 162], [435, 194], [468, 199], [491, 189], [490, 172]], "text": "VILLAGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "VIILAO", "recog_valid": false, "glyph_recog_text": "VILLAGE", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054065.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316214.jpg", "caption": "a man riding a wave on a surfboard in the ocean", "annotations": [{"polygon": [[412, 387], [415, 397], [405, 409], [419, 411], [430, 417], [434, 412], [443, 397], [437, 388], [431, 396], [423, 397], [421, 388]], "text": "Jayde", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "stayde", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447292.jpg", "caption": "a group of young men and women posing for a photo on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316224.jpg", "caption": "a plate topped with strawberries and whipped cream", "annotations": [{"polygon": [[410, 157], [410, 157], [411, 164], [410, 169], [410, 173], [420, 187], [423, 186], [425, 188], [424, 191], [428, 194], [434, 189], [437, 192], [431, 197], [436, 202], [436, 204], [430, 209], [428, 209], [409, 191], [406, 193], [402, 190], [402, 187], [405, 187], [408, 189], [398, 175], [397, 170], [400, 166], [399, 162], [401, 157], [404, 154], [407, 154]], "text": "Bayside", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Bayside", "recog_valid": true, "glyph_recog_text": "Bayside", "glyph_recog_ld": 1.0}, {"polygon": [[393, 181], [393, 186], [393, 188], [396, 191], [392, 194], [393, 198], [396, 199], [397, 200], [401, 198], [403, 200], [408, 205], [405, 211], [409, 217], [413, 214], [414, 216], [411, 219], [411, 221], [409, 222], [407, 224], [407, 227], [404, 226], [395, 217], [391, 211], [384, 202], [381, 196], [380, 189], [385, 183], [391, 179]], "text": "Skillet", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Sallet", "recog_valid": false, "glyph_recog_text": "Skillet", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185166.jpg", "caption": "a black and white photo of a horse in a stable", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185168.jpg", "caption": "a crowd of people walking down a street with giraffes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578387.jpg", "caption": "a kitchen with a counter top and stools", "annotations": [{"polygon": [[108, 367], [106, 419], [194, 423], [194, 387]], "text": "HANSCOM", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "HANSCOM", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185193.jpg", "caption": "a pizza on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316269.jpg", "caption": "a banana in a white box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185200.jpg", "caption": "a table full of old cell phones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447345.jpg", "caption": "a police officer on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316275.jpg", "caption": "a highway sign with a traffic sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054149.jpg", "caption": "a group of men playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447376.jpg", "caption": "a person riding a motorcycle down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185233.jpg", "caption": "oracle team USA's catamaran in action during the oracle team usa team usa team usa team usa team usa team usa", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447379.jpg", "caption": "a man riding a motorcycle on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185247.jpg", "caption": "a man pushing a luggage cart", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578466.jpg", "caption": "a man and a child running on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578482.jpg", "caption": "a rusted old truck and a red trailer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054205.jpg", "caption": "a man in a blue shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316353.jpg", "caption": "a person's feet are in front of a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054218.jpg", "caption": "a blender with a bottle of juice and a bottle of rum", "annotations": [{"polygon": [[159, 322], [156, 357], [168, 360], [217, 348], [236, 331], [236, 318], [195, 327]], "text": "Torans", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "loral", "recog_valid": false, "glyph_recog_text": "Torans", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578513.jpg", "caption": "a baseball player is swinging at a ball", "annotations": [{"polygon": [[279, 360], [281, 392], [359, 394], [356, 361]], "text": "Images", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Imenses", "recog_valid": false, "glyph_recog_text": "Images", "glyph_recog_ld": 0.5714291836725947}, {"polygon": [[163, 218], [157, 247], [188, 253], [195, 223]], "text": "19", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "19", "recog_valid": true, "glyph_recog_text": "19", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054228.jpg", "caption": "two men in suits on a television screen", "annotations": [{"polygon": [[231, 249], [243, 230], [258, 226], [270, 225], [288, 237], [295, 247], [296, 252], [317, 249], [314, 236], [306, 222], [285, 205], [271, 203], [255, 200], [248, 205], [227, 214], [222, 219], [212, 234], [208, 246], [218, 250], [229, 254]], "text": "HOCKEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "HOCKEY", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[239, 243], [225, 254], [223, 265], [230, 273], [236, 274], [250, 270], [296, 259], [295, 254], [285, 238], [278, 255], [252, 262], [246, 264], [245, 250]], "text": "Canada", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CAY", "recog_valid": false, "glyph_recog_text": "Canada", "glyph_recog_ld": 0.1666680555532407}, {"polygon": [[228, 275], [241, 284], [251, 286], [264, 290], [273, 289], [271, 314], [255, 312], [236, 306], [218, 295]], "text": "IG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "7e", "recog_valid": false, "glyph_recog_text": "IG", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054233.jpg", "caption": "a ferry boat is docked at night near a large stadium", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447457.jpg", "caption": "people waiting for a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447479.jpg", "caption": "a large sign on the side of a building that says randy's donuts", "annotations": [{"polygon": [[232, 77], [253, 49], [272, 37], [286, 32], [325, 39], [353, 47], [369, 60], [385, 77], [393, 90], [401, 115], [403, 122], [380, 130], [369, 107], [357, 90], [337, 76], [318, 71], [297, 69], [278, 78], [256, 101]], "text": "RANDYS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "OANDYO", "recog_valid": false, "glyph_recog_text": "RANDYS", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[230, 239], [250, 214], [268, 234], [287, 243], [311, 250], [338, 250], [358, 250], [371, 237], [376, 236], [391, 261], [367, 275], [344, 280], [323, 282], [303, 279], [285, 274], [273, 270]], "text": "DONUTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DONUTS", "recog_valid": true, "glyph_recog_text": "DONUTS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447485.jpg", "caption": "a bottle of wine and two pastries sitting on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316415.jpg", "caption": "a man standing in front of a boat in a harbor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578567.jpg", "caption": "a young boy pitching a baseball in a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578572.jpg", "caption": "a man sitting in front of a laptop with stickers on it", "annotations": [{"polygon": [[98, 342], [100, 356], [132, 354], [151, 346], [154, 334], [148, 324], [112, 332]], "text": "EBAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EAEOY", "recog_valid": false, "glyph_recog_text": "EBAY", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185358.jpg", "caption": "a street with cars parked on both sides of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054294.jpg", "caption": "a woman in a kitchen with a cake on the counter", "annotations": [{"polygon": [[217, 294], [217, 294], [250, 287], [296, 289], [314, 293], [314, 298], [311, 330], [267, 326], [231, 330]], "text": "XAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "XAI", "recog_valid": false, "glyph_recog_text": "XAS", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185368.jpg", "caption": "a group of men standing around a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316441.jpg", "caption": "a van parked on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316447.jpg", "caption": "a street sign with a red arrow", "annotations": [{"polygon": [[343, 418], [339, 425], [333, 434], [324, 441], [334, 453], [345, 446], [352, 435], [358, 421]], "text": "19h", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "19h", "recog_valid": true, "glyph_recog_text": "19h", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054304.jpg", "caption": "a man eating a piece of cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578596.jpg", "caption": "a black and white photo of a store front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185394.jpg", "caption": "a man and a woman riding a horse on a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054345.jpg", "caption": "a red fire truck parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054351.jpg", "caption": "a man playing tennis on a court", "annotations": [{"polygon": [[358, 99], [374, 92], [356, 126], [344, 129]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "P", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185436.jpg", "caption": "a table with a bunch of fruits and vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054375.jpg", "caption": "a group of men in red and blue uniforms cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316528.jpg", "caption": "an old photo of a horse and carriage with a man standing next to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447607.jpg", "caption": "a traffic light on a street corner with a house in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054402.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185486.jpg", "caption": "a young boy holding a bat and a green toothbrush", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316561.jpg", "caption": "a desk with a clock, a cup of coffee, and papers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054425.jpg", "caption": "a man and a woman eating a banana", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316571.jpg", "caption": "a man is crossing the street in front of a bus", "annotations": [{"polygon": [[8, 85], [63, 110], [69, 130], [0, 105], [0, 85]], "text": "ISEST", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "ISEST", "recog_valid": true, "glyph_recog_text": "ISEST", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185502.jpg", "caption": "a yellow school bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185514.jpg", "caption": "a train is parked at a train station", "annotations": [{"polygon": [[308, 229], [332, 229], [322, 337], [288, 319]], "text": "first", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "之三", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[330, 274], [365, 287], [361, 331], [327, 313]], "text": "class", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "喜", "recog_valid": false, "glyph_recog_text": "o一", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316595.jpg", "caption": "a man and a young girl skateboarding on a ramp", "annotations": [{"polygon": [[487, 406], [478, 367], [325, 367], [342, 402]], "text": "m", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "BEOM", "recog_valid": false, "glyph_recog_text": "m", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578748.jpg", "caption": "a traffic light with a picture of a man on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316612.jpg", "caption": "great blue heron in the winter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185547.jpg", "caption": "a man doing a trick on a motorcycle in a parking lot", "annotations": [{"polygon": [[361, 183], [367, 153], [512, 130], [512, 160]], "text": "Craigjones.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CraigjoneS.com", "recog_valid": false, "glyph_recog_text": "Craigjones.con", "glyph_recog_ld": 0.8571429591836006}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447694.jpg", "caption": "a skateboarder is doing a trick in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578775.jpg", "caption": "a laptop computer sitting on top of a wooden table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316640.jpg", "caption": "an old world war ii plane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578793.jpg", "caption": "a window display with a motorcycle and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316649.jpg", "caption": "a small airplane flying over a field with smoke coming out of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316657.jpg", "caption": "a group of people skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447731.jpg", "caption": "a motorcycle racer is going down a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316660.jpg", "caption": "a cat sitting in a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054521.jpg", "caption": "a cat standing in an open refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447741.jpg", "caption": "a man holding a cat", "annotations": [{"polygon": [[258, 362], [221, 365], [230, 396], [273, 395]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[293, 363], [257, 368], [272, 395], [309, 388]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "XAPPY", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578819.jpg", "caption": "a street sign with several signs on it", "annotations": [{"polygon": [[237, 311], [240, 349], [366, 327], [362, 290]], "text": "SELWYN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SELWYN", "recog_valid": true, "glyph_recog_text": "SELWYN", "glyph_recog_ld": 1.0}, {"polygon": [[374, 287], [377, 323], [424, 316], [420, 278]], "text": "RD", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "RD", "recog_valid": true, "glyph_recog_text": "RD", "glyph_recog_ld": 1.0}, {"polygon": [[240, 380], [242, 418], [443, 406], [440, 365]], "text": "BACKPACKERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "BACKPACKERS", "recog_valid": true, "glyph_recog_text": "BACKPACKERS", "glyph_recog_ld": 1.0}, {"polygon": [[18, 120], [15, 158], [139, 199], [139, 162]], "text": "Williams", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Williams", "recog_valid": true, "glyph_recog_text": "Williams", "glyph_recog_ld": 1.0}, {"polygon": [[152, 168], [155, 203], [184, 213], [182, 175]], "text": "Rd", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "C", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[217, 269], [216, 298], [261, 307], [263, 287]], "text": "BAPTIST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BAPTIOT", "recog_valid": false, "glyph_recog_text": "BAPTIST", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[216, 194], [218, 223], [269, 241], [269, 220], [242, 205]], "text": "Williams", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Willams", "recog_valid": false, "glyph_recog_text": "Wilarms", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054541.jpg", "caption": "three bottles of wine are sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578841.jpg", "caption": "a man and three children standing in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316699.jpg", "caption": "a black and white photo of a farm with a man and a horse pulling a cart with a flock of chickens", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578843.jpg", "caption": "a wooden bench and table in a room with pictures on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185634.jpg", "caption": "a man holding a tennis racket", "annotations": [{"polygon": [[423, 297], [380, 321], [415, 362], [465, 339]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "U", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447785.jpg", "caption": "a group of people playing frisbee on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185652.jpg", "caption": "an old black and white photo of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447795.jpg", "caption": "a large yellow dump truck driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316727.jpg", "caption": "a man holding a box of cereal in his hand", "annotations": [{"polygon": [[295, 156], [349, 167], [348, 196], [305, 189]], "text": "NG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NG", "recog_valid": true, "glyph_recog_text": "NG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054586.jpg", "caption": "a person on skis standing next to a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578875.jpg", "caption": "spider giraffe", "annotations": [{"polygon": [[175, 464], [174, 511], [327, 509], [326, 464]], "text": "SPIDER", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SPIDER", "recog_valid": true, "glyph_recog_text": "SPIDER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447811.jpg", "caption": "a traffic light and two cameras attached to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578884.jpg", "caption": "a man standing next to a bus", "annotations": [{"polygon": [[385, 335], [383, 415], [441, 439], [442, 340], [386, 332]], "text": "193", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "色", "recog_valid": false, "glyph_recog_text": "-o", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578892.jpg", "caption": "a bar with a counter and a window", "annotations": [{"polygon": [[0, 167], [50, 163], [43, 104], [-1, 106]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "5", "recog_valid": false, "glyph_recog_text": "oc", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[67, 164], [76, 224], [115, 218], [106, 158]], "text": "&", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "69", "recog_valid": false, "glyph_recog_text": "心", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578915.jpg", "caption": "a kitchen with a stove, microwave, refrigerator and dishwasher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578918.jpg", "caption": "a woman holding a tennis racket", "annotations": [{"polygon": [[270, 228], [270, 228], [386, 218], [390, 272], [272, 282]], "text": "IBM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IBM", "recog_valid": true, "glyph_recog_text": "IBM", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447863.jpg", "caption": "a group of children sitting at a table eating", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447865.jpg", "caption": "a desk with a box of scissors, a pen, and a cup of coffee", "annotations": [{"polygon": [[256, 369], [257, 393], [227, 419], [168, 426], [197, 377]], "text": "AF", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "AF", "recog_valid": true, "glyph_recog_text": "AF", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054652.jpg", "caption": "a construction site with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316801.jpg", "caption": "a man on a skateboard doing a trick in the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316804.jpg", "caption": "a cat sitting on top of a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578951.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578961.jpg", "caption": "a surfer riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316825.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185759.jpg", "caption": "a young boy in a red shirt and a baseball glove", "annotations": [{"polygon": [[202, 366], [202, 366], [230, 366], [251, 372], [284, 404], [183, 392]], "text": "Phill", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Phille", "recog_valid": false, "glyph_recog_text": "Phill", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054706.jpg", "caption": "a teddy bear wearing a grey shirt", "annotations": [{"polygon": [[59, 407], [57, 435], [84, 436], [126, 428], [155, 428], [156, 424], [142, 406], [124, 408]], "text": "Spark", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "spa", "recog_valid": false, "glyph_recog_text": "Spark", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[252, 343], [233, 372], [293, 374], [299, 363], [295, 358], [267, 337]], "text": "Eva", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Eva", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316855.jpg", "caption": "a table with many pastries and other food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054717.jpg", "caption": "two cats eating food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185792.jpg", "caption": "a young boy pulling a suitcase on a leash", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447941.jpg", "caption": "a group of men in suits cutting into a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316871.jpg", "caption": "a cat laying on top of a blue suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579016.jpg", "caption": "a cat laying on a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579023.jpg", "caption": "a street sign with a picture of a cat on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185818.jpg", "caption": "a man holding a remote control in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579035.jpg", "caption": "a street sign in front of a store", "annotations": [{"polygon": [[66, 183], [62, 198], [121, 181], [121, 168]], "text": "COACH", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "COACH", "recog_valid": true, "glyph_recog_text": "COACH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054749.jpg", "caption": "a sign with chinese writing on it is on the side of the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447976.jpg", "caption": "a car driving down a street with a city skyline in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579053.jpg", "caption": "a wooden desk with magazines, books and a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447983.jpg", "caption": "a dip sign on a street corner", "annotations": [{"polygon": [[227, 152], [279, 155], [279, 143], [296, 143], [297, 121], [230, 118]], "text": "DIP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DIP", "recog_valid": true, "glyph_recog_text": "DIP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579057.jpg", "caption": "a bowl of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185844.jpg", "caption": "a garbage truck is parked at a busy intersection", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185848.jpg", "caption": "a no parking sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185849.jpg", "caption": "two trains parked at a train station with the sun shining", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316934.jpg", "caption": "a row boat tied to a dock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054806.jpg", "caption": "a man sitting at a table with a pizza and a drink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185888.jpg", "caption": "a canadian air canada plane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579145.jpg", "caption": "a woman holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054869.jpg", "caption": "a yellow bus in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579165.jpg", "caption": "a table with a tray of food and a bowl of soup", "annotations": [{"polygon": [[426, 214], [452, 231], [460, 227], [480, 219], [478, 190], [457, 177]], "text": "Oi", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "云", "recog_valid": false, "glyph_recog_text": "ō", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579179.jpg", "caption": "two soccer players are fighting for the ball", "annotations": [{"polygon": [[410, 170], [413, 167], [416, 164], [455, 167], [453, 210], [444, 214], [437, 214], [411, 211], [413, 176], [410, 174]], "text": "15", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "15", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185965.jpg", "caption": "a baseball player throwing a pitch", "annotations": [{"polygon": [[-1, 225], [129, 227], [129, 204], [50, 204], [48, 199], [46, 194], [45, 191], [43, 189], [41, 188], [0, 186]], "text": "CO.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "cO.com", "recog_valid": false, "glyph_recog_text": "cO.com", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317061.jpg", "caption": "a man riding a skateboard on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185991.jpg", "caption": "a woman is putting her luggage on a bus", "annotations": [{"polygon": [[356, 207], [353, 245], [406, 277], [414, 227]], "text": "POLA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "POOG", "recog_valid": false, "glyph_recog_text": "POLA", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448143.jpg", "caption": "a man in white playing tennis", "annotations": [{"polygon": [[321, 328], [327, 321], [336, 315], [349, 308], [361, 305], [378, 304], [396, 307], [412, 314], [420, 322], [420, 346], [414, 340], [401, 331], [387, 325], [370, 324], [354, 327], [346, 332], [334, 341]], "text": "HIGHLAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "H", "recog_valid": false, "glyph_recog_text": "HIGHLAN", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054943.jpg", "caption": "a group of people sitting at a table in a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186035.jpg", "caption": "three boys standing next to a toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054966.jpg", "caption": "a row of motorcycles parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054989.jpg", "caption": "a table with a variety of food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579291.jpg", "caption": "a view of a train station with a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186113.jpg", "caption": "a red bus driving down a street with a statue in the middle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186148.jpg", "caption": "a group of people standing in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448303.jpg", "caption": "a person is skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186196.jpg", "caption": "a man standing on the side of a street", "annotations": [{"polygon": [[304, 230], [302, 292], [414, 291], [415, 236], [359, 226]], "text": "NTE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NTE", "recog_valid": true, "glyph_recog_text": "NTE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579414.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186201.jpg", "caption": "a japanese airplane taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317274.jpg", "caption": "a herd of elephants standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055135.jpg", "caption": "a man in a chef hat preparing food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448351.jpg", "caption": "a white refrigerator with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186207.jpg", "caption": "a bed with red and gold embroidered pillows", "annotations": [{"polygon": [[339, 347], [342, 340], [335, 329], [330, 324], [327, 321], [342, 319], [355, 328], [360, 339], [357, 352], [344, 350]], "text": "KANUU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "(ANOY", "recog_valid": false, "glyph_recog_text": "KANUU", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[215, 313], [198, 345], [216, 357], [303, 363], [314, 357], [318, 317], [286, 310]], "text": "K", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SC", "recog_valid": false, "glyph_recog_text": "K", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[140, 184], [134, 206], [141, 214], [160, 212], [164, 206], [168, 186]], "text": "K", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "K", "recog_valid": true, "glyph_recog_text": "¥", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317286.jpg", "caption": "a stop sign with chinese writing on it", "annotations": [{"polygon": [[216, 97], [253, 57], [257, 78], [218, 116]], "text": "MUNRO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "MUNRO", "recog_valid": true, "glyph_recog_text": "MJNRO", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[213, 242], [302, 269], [302, 305], [210, 282]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186227.jpg", "caption": "a group of people standing on the street holding signs", "annotations": [{"polygon": [[354, 210], [400, 211], [401, 184], [356, 180]], "text": "FREE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FREE", "recog_valid": true, "glyph_recog_text": "FREE", "glyph_recog_ld": 1.0}, {"polygon": [[463, 212], [402, 213], [404, 183], [460, 186]], "text": "BURMA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BURMA", "recog_valid": true, "glyph_recog_text": "BURMA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186265.jpg", "caption": "a piece of cake with the word stoff written on it", "annotations": [{"polygon": [[231, 261], [216, 197], [249, 188], [323, 175], [361, 170], [379, 203], [390, 233], [365, 235], [333, 237], [265, 250]], "text": "toff", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "stofe", "recog_valid": false, "glyph_recog_text": "toff", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579487.jpg", "caption": "a white truck with a red and blue logo", "annotations": [{"polygon": [[452, 276], [447, 308], [480, 327], [487, 302]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317349.jpg", "caption": "two girls sitting on a ledge", "annotations": [{"polygon": [[398, 105], [399, 365], [455, 364], [455, 104]], "text": "JUNGLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "JUNGLF", "recog_valid": false, "glyph_recog_text": "SDZ0J", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186280.jpg", "caption": "a kitchen with a sink, stove, and a trash can", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186298.jpg", "caption": "a truck carrying a large white object on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579520.jpg", "caption": "a baseball player swinging at a pitch during a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055232.jpg", "caption": "a group of people sitting at a table under umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186308.jpg", "caption": "a large white airplane taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186315.jpg", "caption": "a black and white photo of people walking down the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448461.jpg", "caption": "a small airplane sitting on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579539.jpg", "caption": "a chocolate cake with orange icing and a sign that says 60 years", "annotations": [{"polygon": [[199, 327], [280, 325], [287, 357], [201, 361]], "text": "Sixty", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Sug", "recog_valid": false, "glyph_recog_text": "Sixty", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[166, 256], [354, 252], [356, 337], [159, 337]], "text": "Happy Birthday John Sixty", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "68", "recog_valid": false, "glyph_recog_text": " cans", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579541.jpg", "caption": "a toilet in a bathroom", "annotations": [{"polygon": [[63, 43], [63, 104], [204, 107], [199, 44]], "text": "Stevie", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Slevi", "recog_valid": false, "glyph_recog_text": "Stevie", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[64, 83], [64, 139], [203, 141], [201, 83]], "text": "Rays", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Rag", "recog_valid": false, "glyph_recog_text": "Rays", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[86, 155], [97, 134], [104, 140], [115, 144], [126, 145], [124, 172], [110, 170], [96, 164]], "text": "LIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "LIVE", "recog_valid": true, "glyph_recog_text": "LIVE", "glyph_recog_ld": 1.0}, {"polygon": [[132, 147], [133, 171], [149, 169], [168, 163], [184, 151], [172, 132], [162, 141], [150, 147]], "text": "MUSIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MUSIC", "recog_valid": true, "glyph_recog_text": "MUSIC", "glyph_recog_ld": 1.0}, {"polygon": [[374, 101], [379, 23], [447, 29], [448, 118]], "text": "NO", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "兰", "recog_valid": false, "glyph_recog_text": "zo", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[337, 101], [328, 172], [362, 179], [403, 195], [446, 208], [450, 127]], "text": "SERV", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SERV", "recog_valid": true, "glyph_recog_text": "SERV", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317401.jpg", "caption": "a clock with skulls and bones around it", "annotations": [{"polygon": [[287, 157], [288, 186], [255, 186], [251, 156]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "12", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448475.jpg", "caption": "a bus stop with a bus and a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186361.jpg", "caption": "a man swinging a tennis racket at a tennis ball", "annotations": [{"polygon": [[72, 74], [73, 5], [373, 4], [371, 72]], "text": "HASE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HASE", "recog_valid": true, "glyph_recog_text": "HASE", "glyph_recog_ld": 1.0}, {"polygon": [[289, 318], [289, 283], [400, 281], [398, 317]], "text": "POLO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "POLO", "recog_valid": true, "glyph_recog_text": "POLO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055290.jpg", "caption": "a stuffed bear with a purple ribbon", "annotations": [{"polygon": [[195, 285], [225, 255], [234, 262], [201, 293]], "text": "CONR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CONR", "recog_valid": true, "glyph_recog_text": "CONR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055303.jpg", "caption": "a police officer on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055315.jpg", "caption": "a person riding a motorcycle on a track", "annotations": [{"polygon": [[227, 225], [219, 238], [234, 245], [266, 256], [268, 240]], "text": "Mobil", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Mobil", "recog_valid": true, "glyph_recog_text": "Mobil", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317460.jpg", "caption": "two red double decker buses are driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448533.jpg", "caption": "a display of cuckoo clocks in a store window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317474.jpg", "caption": "a bunch of bananas in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317476.jpg", "caption": "a street with a sign has texts", "annotations": [{"polygon": [[269, 196], [263, 220], [318, 210], [319, 190]], "text": "yulla", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "8lla", "recog_valid": false, "glyph_recog_text": "yulla", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448560.jpg", "caption": "a tennis player is playing on a court in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317495.jpg", "caption": "a man in a suit standing on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186442.jpg", "caption": "a black cat sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055385.jpg", "caption": "a group of men in red and blue uniforms standing around a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317532.jpg", "caption": "a stop sign with a car in the background", "annotations": [{"polygon": [[150, 171], [414, 181], [402, 309], [115, 276], [115, 276]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317534.jpg", "caption": "a stop sign and a street sign on a pole", "annotations": [{"polygon": [[197, 80], [267, 86], [269, 98], [267, 112], [198, 109]], "text": "BOARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BOARD", "recog_valid": true, "glyph_recog_text": "BOARD", "glyph_recog_ld": 1.0}, {"polygon": [[99, 95], [292, 93], [293, 130], [104, 132]], "text": "WASHBORN BOARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "WASHBOIRE", "recog_valid": false, "glyph_recog_text": "WASHBORNBOARD", "glyph_recog_ld": 0.5384618934908512}, {"polygon": [[140, 299], [296, 300], [294, 363], [139, 363]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448609.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448616.jpg", "caption": "a person wearing a green shirt and a tie with a cat on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186476.jpg", "caption": "a group of people standing around a table with boxes of apples", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055410.jpg", "caption": "a woman sitting at a baggage claim counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579700.jpg", "caption": "a black and white photo of a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448633.jpg", "caption": "a blue bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579699.jpg", "caption": "a train is pulling into a station at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055418.jpg", "caption": "a train sitting on the tracks next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055425.jpg", "caption": "a row of motorcycles parked on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448648.jpg", "caption": "a busy street with people on bikes and cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579729.jpg", "caption": "a man riding a motorcycle with a person on a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055445.jpg", "caption": "a book, a cell phone, and a wallet on a desk", "annotations": [{"polygon": [[29, 277], [35, 299], [120, 280], [123, 278], [114, 255], [69, 264], [53, 268]], "text": "NIG", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NIC", "recog_valid": false, "glyph_recog_text": "NIG", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[39, 332], [54, 356], [211, 319], [199, 298], [119, 310], [107, 311]], "text": "WATCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "WATCH", "recog_valid": true, "glyph_recog_text": "WATCH", "glyph_recog_ld": 1.0}, {"polygon": [[432, 418], [438, 426], [473, 402], [471, 391]], "text": "CLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "TRIES", "recog_valid": false, "glyph_recog_text": "日1品3", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579752.jpg", "caption": "a vintage bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186544.jpg", "caption": "a young man in black shirt and white shorts playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055484.jpg", "caption": "a black and white photo of sailboats in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448702.jpg", "caption": "a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186562.jpg", "caption": "a man riding a skateboard on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448710.jpg", "caption": "a steam train is traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579785.jpg", "caption": "a young man and woman looking at a cell phone", "annotations": [{"polygon": [[278, 346], [366, 327], [364, 363], [279, 383]], "text": "Ultimate", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "illiony", "recog_valid": false, "glyph_recog_text": "Ultimate", "glyph_recog_ld": 0.2500009374988281}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448723.jpg", "caption": "a cat is sniffing a pizza box on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186585.jpg", "caption": "a laptop computer with a cell phone on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186599.jpg", "caption": "a green truck with a bed on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186606.jpg", "caption": "a little league baseball game with a batter and catcher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186611.jpg", "caption": "a baseball game in progress with a pitcher throwing the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186615.jpg", "caption": "a busy street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448761.jpg", "caption": "a man is bending over a refrigerator in a kitchen", "annotations": [{"polygon": [[184, 104], [255, 115], [249, 160], [183, 155]], "text": "YAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YAY", "recog_valid": true, "glyph_recog_text": "YAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317690.jpg", "caption": "a tall clock tower with a dark sky behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317687.jpg", "caption": "a car is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055567.jpg", "caption": "a sign for the emerald dental centre in south africa", "annotations": [{"polygon": [[173, 77], [175, 110], [316, 114], [317, 90], [297, 80]], "text": "Ennerdale", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Ennerdale", "recog_valid": true, "glyph_recog_text": "Ennerdale", "glyph_recog_ld": 1.0}, {"polygon": [[281, 120], [282, 153], [377, 153], [378, 133], [339, 119]], "text": "Centre", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Centre", "recog_valid": true, "glyph_recog_text": "Centre", "glyph_recog_ld": 1.0}, {"polygon": [[-1, 110], [0, 142], [79, 145], [85, 122], [53, 110]], "text": "Medic", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Medic", "recog_valid": true, "glyph_recog_text": "Medic", "glyph_recog_ld": 1.0}, {"polygon": [[73, 261], [68, 288], [100, 296], [154, 296], [157, 288], [158, 268]], "text": "Agang", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Agang", "recog_valid": true, "glyph_recog_text": "Agang", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579862.jpg", "caption": "people riding bicycles on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186646.jpg", "caption": "a man is standing next to a blender", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055578.jpg", "caption": "a woman sitting at a table with a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579899.jpg", "caption": "a black and white photo of people standing on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579901.jpg", "caption": "a baseball player holding a bat in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579907.jpg", "caption": "a living room with two leather couches and a flat screen tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448842.jpg", "caption": "a street sign with a building in the background", "annotations": [{"polygon": [[135, 80], [170, 113], [169, 122], [131, 89]], "text": "Scaffold", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Scafoe", "recog_valid": false, "glyph_recog_text": "gcafrpie", "glyph_recog_ld": 0.5000006249992187}, {"polygon": [[128, 126], [180, 171], [182, 163], [163, 145], [161, 142], [131, 117]], "text": "Beauchamp", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Beaucarmo", "recog_valid": false, "glyph_recog_text": "Ee间icnomp", "glyph_recog_ld": 0.333334074073251}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186703.jpg", "caption": "a cupcake and a bottle of milk on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317781.jpg", "caption": "a dog sitting in a chair", "annotations": [{"polygon": [[381, 44], [374, 84], [414, 91], [421, 50]], "text": "JE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "JE", "recog_valid": true, "glyph_recog_text": "J", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055637.jpg", "caption": "a man riding a motorcycle down a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055641.jpg", "caption": "a red and white train on a rainy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055642.jpg", "caption": "a black train engine sitting on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448863.jpg", "caption": "a large airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055651.jpg", "caption": "two people are working in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055665.jpg", "caption": "a person is using a cell phone while driving", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186740.jpg", "caption": "a large white airplane", "annotations": [{"polygon": [[316, 303], [360, 307], [351, 337], [308, 335]], "text": "jetBlue", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "jerBlue", "recog_valid": false, "glyph_recog_text": "jetBlue", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055681.jpg", "caption": "a man riding a horse down a street", "annotations": [{"polygon": [[394, 202], [386, 246], [497, 260], [502, 214]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000448911.jpg", "caption": "two trains are side by side with the number 3 on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186775.jpg", "caption": "a street with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186777.jpg", "caption": "a bench on a beach with a no trespassing sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579997.jpg", "caption": "a man playing ping pong", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186799.jpg", "caption": "a man in a boat with a basket of vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317905.jpg", "caption": "a desk with a computer monitor and a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055764.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317911.jpg", "caption": "a man standing next to a bike with a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055767.jpg", "caption": "three jockeys race horses down a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449004.jpg", "caption": "a semi truck is involved in a crash on the highway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317934.jpg", "caption": "a man in a hat and vest paddling a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449019.jpg", "caption": "a man is feeding a dog a piece of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449029.jpg", "caption": "a black and white photo of a street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317986.jpg", "caption": "a young boy wearing a hat and a cup of coffee", "annotations": [{"polygon": [[336, 363], [335, 344], [384, 329], [393, 344]], "text": "DARIGOLD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DARIGOD", "recog_valid": false, "glyph_recog_text": "DARGOLD", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[299, 39], [303, 80], [335, 80], [335, 43]], "text": "Krisp", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "hbu", "recog_valid": false, "glyph_recog_text": "长", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[217, 58], [216, 18], [185, 37], [188, 78]], "text": "eme", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "图", "recog_valid": false, "glyph_recog_text": "o E", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055849.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[242, 126], [252, 125], [256, 135], [269, 133], [271, 124], [299, 126], [311, 115], [318, 118], [314, 126], [333, 125], [333, 135], [333, 142], [330, 148], [279, 160], [276, 163], [260, 169], [253, 174], [247, 167], [247, 161], [239, 156], [239, 137]], "text": "Orioles ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ceisdog", "recog_valid": false, "glyph_recog_text": "Orioles", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449066.jpg", "caption": "a traffic light and a sign that says australia", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317997.jpg", "caption": "two men standing on a tennis court", "annotations": [{"polygon": [[110, 5], [109, 59], [242, 62], [222, 6]], "text": "KIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "KIA", "recog_valid": true, "glyph_recog_text": "KIA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580142.jpg", "caption": "a traffic light with a green light and a red light", "annotations": [{"polygon": [[193, 331], [226, 331], [227, 373], [191, 373]], "text": "16", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "15", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580146.jpg", "caption": "a snowboarder is doing a trick in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055859.jpg", "caption": "a young boy standing next to a fire hydrant", "annotations": [{"polygon": [[272, 218], [270, 226], [274, 235], [284, 246], [293, 252], [302, 257], [304, 258], [320, 255], [333, 254], [334, 251], [326, 251], [315, 250], [306, 252], [301, 250], [296, 248], [292, 246], [286, 242], [280, 235], [276, 226]], "text": "VARSITY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "VARSITY", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449078.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[278, 204], [293, 199], [312, 189], [326, 186], [329, 210], [308, 217], [283, 224]], "text": "30", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "30", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055874.jpg", "caption": "a woman holding a kite", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580166.jpg", "caption": "a train traveling down a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055886.jpg", "caption": "a train traveling down a track with people standing on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449103.jpg", "caption": "people walking down the street with umbrellas", "annotations": [{"polygon": [[122, 116], [130, 84], [155, 85], [198, 106], [214, 118], [237, 132], [251, 139], [259, 156], [254, 179]], "text": "CASINO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CASINO", "recog_valid": true, "glyph_recog_text": "CASINO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449106.jpg", "caption": "a bed with a white and gray comforter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580191.jpg", "caption": "a man pushing a cart full of bananas down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318071.jpg", "caption": "stephen evans and his dog, the book that changed his life", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055947.jpg", "caption": "a group of people standing around a table eating pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449179.jpg", "caption": "girls soccer game", "annotations": [{"polygon": [[80, 158], [90, 154], [124, 159], [116, 202], [81, 196]], "text": "16", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "16", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318107.jpg", "caption": "a double decker bus with a large advertisement on the side", "annotations": [{"polygon": [[271, 196], [278, 229], [317, 231], [315, 206], [290, 206], [288, 197]], "text": "WICKED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NiXE", "recog_valid": false, "glyph_recog_text": "WCKED", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[329, 208], [330, 234], [372, 238], [371, 213]], "text": "LONDON'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CONOONS", "recog_valid": false, "glyph_recog_text": "LONDONS", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318154.jpg", "caption": "a man eating a sandwich in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318164.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056023.jpg", "caption": "a group of people standing in the snow with snowboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449244.jpg", "caption": "a stop sign and a train crossing sign", "annotations": [{"polygon": [[229, 188], [229, 188], [138, 258], [120, 239], [217, 172], [230, 186]], "text": "SSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": ".SSING", "recog_valid": false, "glyph_recog_text": "SSING", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[92, 280], [94, 334], [205, 329], [219, 300], [222, 286], [212, 275], [94, 281]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[120, 168], [134, 157], [164, 196], [149, 206]], "text": "RA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RA", "recog_valid": true, "glyph_recog_text": "RA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187103.jpg", "caption": "a baseball player standing on a field", "annotations": [{"polygon": [[291, 226], [333, 227], [335, 263], [292, 262]], "text": "30", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "30", "recog_valid": true, "glyph_recog_text": "30", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056032.jpg", "caption": "a plate with a slice of pizza and a bottle of wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580334.jpg", "caption": "a giraffe eating a branch from a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449270.jpg", "caption": "a clock is hanging in the window of a store", "annotations": [{"polygon": [[370, 115], [382, 113], [393, 108], [403, 98], [412, 83], [416, 79], [424, 83], [421, 94], [410, 110], [391, 123], [374, 128]], "text": "THEATRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "THFATRI", "recog_valid": false, "glyph_recog_text": "THEATRE", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[322, 89], [331, 96], [336, 105], [346, 109], [341, 121], [327, 111], [313, 96]], "text": "RUE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "RUE", "recog_valid": true, "glyph_recog_text": "RUE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187131.jpg", "caption": "a red bus is parked on the side of the road", "annotations": [{"polygon": [[158, 123], [184, 119], [175, 154], [155, 155], [147, 146]], "text": "407", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "字", "recog_valid": false, "glyph_recog_text": "Lot", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056065.jpg", "caption": "air canada boeing 767-300", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318220.jpg", "caption": "a silver airplane with a flag painted on the side", "annotations": [{"polygon": [[247, 315], [258, 284], [308, 279], [304, 308]], "text": "glory", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Goy", "recog_valid": false, "glyph_recog_text": "glory", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318241.jpg", "caption": "two men playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318247.jpg", "caption": "the bronco, downtown indianapolis, indiana, usa", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056118.jpg", "caption": "a skateboarder doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318279.jpg", "caption": "a group of men playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580466.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187253.jpg", "caption": "a man walking through an airport with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056193.jpg", "caption": "a boat with a window that shows the city and water", "annotations": [{"polygon": [[64, 171], [62, 189], [108, 173], [105, 154]], "text": "Accademia", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "decagemia", "recog_valid": false, "glyph_recog_text": "Accadernis", "glyph_recog_ld": 0.40000059999939996}, {"polygon": [[66, 343], [64, 361], [110, 377], [110, 358], [110, 355]], "text": "Accademia", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "kocademia", "recog_valid": false, "glyph_recog_text": "Ac adletia", "glyph_recog_ld": 0.5000004999995}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187277.jpg", "caption": "a group of children posing for a photo on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449422.jpg", "caption": "a child's hand on a mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056206.jpg", "caption": "a person holding a cup of coffee on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056261.jpg", "caption": "a woman walking with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580562.jpg", "caption": "a large dump truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580579.jpg", "caption": "a box of doughnuts", "annotations": [{"polygon": [[140, 351], [186, 368], [187, 355], [140, 338]], "text": "FUND", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FUND", "recog_valid": true, "glyph_recog_text": "FURD", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[133, 363], [132, 375], [79, 352], [82, 342]], "text": "CONTACT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CONTACT", "recog_valid": true, "glyph_recog_text": "CONTACT", "glyph_recog_ld": 1.0}, {"polygon": [[174, 395], [134, 377], [134, 364], [172, 382]], "text": "YOUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YOUR", "recog_valid": true, "glyph_recog_text": "YOUR", "glyph_recog_ld": 1.0}, {"polygon": [[225, 417], [176, 396], [175, 382], [221, 403]], "text": "LOCAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "LOCAL", "recog_valid": true, "glyph_recog_text": "LOCAL", "glyph_recog_ld": 1.0}, {"polygon": [[228, 405], [228, 418], [281, 443], [280, 429]], "text": "STORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STORE", "recog_valid": true, "glyph_recog_text": "STORE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449508.jpg", "caption": "a boat with people on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056292.jpg", "caption": "a baseball player is running to home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187371.jpg", "caption": "a man in a cowboy hat riding a white horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056313.jpg", "caption": "a baseball player walking to home plate", "annotations": [{"polygon": [[212, 152], [242, 156], [238, 185], [224, 182], [207, 180]], "text": "23", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "23", "recog_valid": true, "glyph_recog_text": "23", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318483.jpg", "caption": "a bus driving down a street with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580636.jpg", "caption": "a yellow box car with the pacific fruit express logo on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318495.jpg", "caption": "a hand holding a banana with a sticker on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187442.jpg", "caption": "three men sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580668.jpg", "caption": "a man sitting on a bench with a woman sitting next to him", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449610.jpg", "caption": "a bottle of beer sitting on a table next to a remote control", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580695.jpg", "caption": "two girls in red and black soccer uniforms playing soccer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187488.jpg", "caption": "a tv and a dvd player sitting on a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056432.jpg", "caption": "a man crossing the street in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056435.jpg", "caption": "a woman is using a laptop computer to view a video", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187519.jpg", "caption": "a baseball player in a gray uniform holding a bat", "annotations": [{"polygon": [[115, 133], [114, 163], [180, 167], [188, 163], [187, 134]], "text": "Bank", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bank", "recog_valid": true, "glyph_recog_text": "Bank", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449691.jpg", "caption": "a young boy sitting in the driver's seat of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318637.jpg", "caption": "two men playing tennis on a blue court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187569.jpg", "caption": "a man riding a bike down a street with a sign in the background", "annotations": [{"polygon": [[413, 108], [416, 193], [447, 182], [449, 89]], "text": "IF", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "-m", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449721.jpg", "caption": "a green monster truck with large tires", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580797.jpg", "caption": "a group of people playing soccer on an indoor field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056521.jpg", "caption": "a woman holding a kite with a rainbow on it", "annotations": [{"polygon": [[323, 36], [326, 84], [343, 83], [380, 59], [393, 47], [389, 28], [379, 22], [338, 18]], "text": "Kie", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ke", "recog_valid": false, "glyph_recog_text": "Kie", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187611.jpg", "caption": "a large crowd of people standing in a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449780.jpg", "caption": "a group of people sitting around a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318706.jpg", "caption": "a man sitting on a bench in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449808.jpg", "caption": "a group of people posing for a photo on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318740.jpg", "caption": "a blue and yellow bird eating a piece of bread", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056608.jpg", "caption": "a man standing next to a large vase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580906.jpg", "caption": "a busy street with cars and buses", "annotations": [{"polygon": [[266, 271], [268, 302], [306, 293], [304, 263]], "text": "GO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GO", "recog_valid": true, "glyph_recog_text": "GO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449845.jpg", "caption": "a cow in a barn", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449847.jpg", "caption": "a person riding a motorcycle down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056634.jpg", "caption": "a green train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056647.jpg", "caption": "a refrigerator with a door open", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187728.jpg", "caption": "a large clock on a building with roman numerals", "annotations": [{"polygon": [[123, 192], [126, 185], [141, 168], [152, 172], [156, 181], [143, 201], [135, 206], [131, 204]], "text": "50", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "50", "recog_valid": true, "glyph_recog_text": "50", "glyph_recog_ld": 1.0}, {"polygon": [[117, 303], [131, 326], [151, 330], [153, 318], [136, 293], [125, 295]], "text": "40", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ot", "recog_valid": false, "glyph_recog_text": "40", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[322, 356], [349, 338], [363, 355], [330, 375], [322, 362]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "96", "recog_valid": false, "glyph_recog_text": "25", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[372, 318], [390, 290], [407, 296], [410, 302], [394, 326], [385, 330], [375, 325]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "0%", "recog_valid": false, "glyph_recog_text": "20", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[389, 167], [393, 171], [403, 185], [398, 197], [383, 196], [371, 178]], "text": "1", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "10", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[141, 238], [139, 267], [145, 265], [184, 262], [185, 235]], "text": "XI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "X l", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[146, 285], [157, 306], [166, 317], [179, 329], [204, 297], [186, 270]], "text": "VIII", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MⅢ", "recog_valid": false, "glyph_recog_text": "VIII", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[191, 336], [213, 303], [240, 317], [234, 353], [222, 352]], "text": "VII", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "瓜", "recog_valid": false, "glyph_recog_text": "VII", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[253, 319], [254, 356], [271, 357], [291, 353], [274, 317]], "text": "VI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "K", "recog_valid": false, "glyph_recog_text": "v", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[305, 309], [317, 347], [339, 332]], "text": "V", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "一", "recog_valid": false, "glyph_recog_text": "v", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[323, 296], [359, 316], [372, 300], [382, 280], [343, 264], [333, 284]], "text": "IIII", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "!!!!", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[344, 233], [386, 230], [388, 246], [387, 261], [343, 260], [345, 247]], "text": "III", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "111", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[242, 146], [243, 184], [261, 179], [283, 182], [285, 143], [262, 142]], "text": "XII", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "xI", "recog_valid": false, "glyph_recog_text": "XIl", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[318, 154], [298, 186], [308, 191], [329, 158], [324, 155]], "text": "I", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": ":", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[326, 202], [338, 223], [375, 202], [364, 186]], "text": "II", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "一", "recog_valid": false, "glyph_recog_text": "1!", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[189, 168], [200, 158], [216, 153], [237, 184], [222, 191], [209, 198]], "text": "XI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "天", "recog_valid": false, "glyph_recog_text": "XI", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[152, 208], [191, 225], [200, 207], [162, 190], [154, 198]], "text": "X", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "x", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580960.jpg", "caption": "a computer with a monitor and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318820.jpg", "caption": "a man holding a bunch of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187752.jpg", "caption": "a cat is sitting in a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580971.jpg", "caption": "a television screen showing a man in a costume", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580974.jpg", "caption": "a baseball player is swinging a bat on a field", "annotations": [{"polygon": [[125, 186], [165, 176], [171, 203], [132, 213]], "text": "34", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "34", "recog_valid": true, "glyph_recog_text": "34", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000449904.jpg", "caption": "a woman sitting at a table with pizza and a soda", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000580979.jpg", "caption": "a blue train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187765.jpg", "caption": "a snowboarder in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187785.jpg", "caption": "a man riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056715.jpg", "caption": "beaumont beds london", "annotations": [{"polygon": [[294, 150], [295, 183], [365, 185], [364, 148]], "text": "Beds", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Beds", "recog_valid": true, "glyph_recog_text": "Beds", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581042.jpg", "caption": "a large clock in front of an american flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581057.jpg", "caption": "a motorcycle with a number on it parked on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187844.jpg", "caption": "a united airlines plane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581061.jpg", "caption": "a man riding a bike on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581071.jpg", "caption": "a man in yellow vest is crossing the street", "annotations": [{"polygon": [[168, 120], [168, 120], [214, 130], [211, 153], [156, 143]], "text": "Jewel", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Jewel", "recog_valid": true, "glyph_recog_text": "Jewel", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581073.jpg", "caption": "a clock on a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450003.jpg", "caption": "a woman and a child on skis on a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450006.jpg", "caption": "two men shaking hands on a tennis court", "annotations": [{"polygon": [[35, 494], [156, 494], [156, 442], [44, 456]], "text": "AIK", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "KATY", "recog_valid": false, "glyph_recog_text": "AIK", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318937.jpg", "caption": "two men on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187882.jpg", "caption": "a large building with a large window and a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187886.jpg", "caption": "a messy desk with a computer and a lot of stuff on it", "annotations": [{"polygon": [[356, 154], [423, 134], [424, 156], [357, 174]], "text": "SPLENDID", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SPLENDID", "recog_valid": true, "glyph_recog_text": "SPLENDID", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187897.jpg", "caption": "a plate with sushi and chopsticks on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450050.jpg", "caption": "a man riding a skateboard down a street next to a bus", "annotations": [{"polygon": [[79, 128], [79, 128], [107, 122], [111, 144], [83, 152]], "text": "160", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "160", "recog_valid": true, "glyph_recog_text": "160", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450052.jpg", "caption": "a group of people walking on a boardwalk", "annotations": [{"polygon": [[170, 207], [171, 235], [191, 232], [213, 230], [239, 229], [269, 227], [292, 229], [308, 231], [321, 232], [320, 210], [303, 205], [288, 204], [274, 203], [256, 202], [229, 203]], "text": "STRAND5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STRAND", "recog_valid": false, "glyph_recog_text": "STRAND5", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056845.jpg", "caption": "a group of people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000318995.jpg", "caption": "a baseball player throwing a pitch on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056859.jpg", "caption": "a baseball player is throwing a pitch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187934.jpg", "caption": "a pile of snow in front of a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187939.jpg", "caption": "two laptops sitting on a wooden table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056875.jpg", "caption": "a train station with people walking on the platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000056892.jpg", "caption": "a baseball player is sliding into home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319043.jpg", "caption": "a living room with a couch, chair and table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000187976.jpg", "caption": "a horse and rider jumping over an obstacle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581196.jpg", "caption": "a train on the tracks with a blue and white train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319061.jpg", "caption": "a hot dog and a can of beer on a wooden bench", "annotations": [{"polygon": [[267, 264], [274, 266], [281, 270], [286, 274], [290, 271], [293, 270], [296, 269], [300, 266], [310, 263], [326, 255], [342, 245], [342, 239], [346, 236], [348, 238], [350, 236], [354, 233], [351, 238], [350, 246], [346, 254], [343, 257], [338, 257], [334, 263], [335, 267], [334, 274], [329, 281], [325, 282], [326, 275], [327, 270], [322, 272], [319, 272], [317, 275], [310, 278], [305, 282], [302, 284], [300, 285], [298, 288], [296, 289], [293, 289], [294, 293], [295, 300], [291, 306], [284, 312], [280, 314], [277, 312], [273, 297], [271, 289], [265, 290], [259, 286], [258, 279], [258, 270], [261, 266], [262, 265]], "text": "Barq's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bang", "recog_valid": false, "glyph_recog_text": "Barq's", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319093.jpg", "caption": "a man holding a kite in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319098.jpg", "caption": "a store filled with fruit and vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319105.jpg", "caption": "a group of people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319120.jpg", "caption": "a fire hydrant on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188068.jpg", "caption": "a black motorcycle parked on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581293.jpg", "caption": "a truck with a large fan on the back of it", "annotations": [{"polygon": [[473, 305], [471, 319], [510, 302], [511, 288]], "text": "12,000", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "12.000", "glyph_recog_ld": 0.1666680555532407}, {"polygon": [[463, 270], [460, 303], [469, 301], [490, 286], [511, 271], [511, 240]], "text": "FLA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "FLA", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581297.jpg", "caption": "a young boy kicking a soccer ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319155.jpg", "caption": "a fruit stand with many different types of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188087.jpg", "caption": "an old black and white photo of a boat in front of a lighthouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319188.jpg", "caption": "two birds flying over a lighthouse at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188139.jpg", "caption": "a street sign with a chinese character on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188140.jpg", "caption": "a woman standing next to a truck with a trailer attached", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188165.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319266.jpg", "caption": "a red train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581419.jpg", "caption": "a person's feet standing next to four clocks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581425.jpg", "caption": "a cake with a picture of a man on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450362.jpg", "caption": "a baseball player is swinging a bat at a ball", "annotations": [{"polygon": [[393, 70], [392, 96], [508, 100], [507, 72]], "text": "LAWSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "LAWSON", "recog_valid": true, "glyph_recog_text": "LAWSON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188225.jpg", "caption": "a small plane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581446.jpg", "caption": "two women shaking hands on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057157.jpg", "caption": "a man on a skateboard is looking at his phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057166.jpg", "caption": "a bunch of carrots in a plastic container", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319315.jpg", "caption": "a keyboard and mouse sitting on top of a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188252.jpg", "caption": "a toilet with a remote control and a trash can", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319332.jpg", "caption": "a man working in a factory with a bunch of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319354.jpg", "caption": "a car parked on the side of the road with a purple umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450434.jpg", "caption": "a pink and white shop with a sign that says donuts", "annotations": [{"polygon": [[330, 340], [331, 360], [340, 364], [354, 370], [367, 372], [381, 374], [399, 373], [418, 368], [418, 346], [412, 350], [398, 352], [387, 352], [377, 352], [366, 350], [353, 349], [342, 345]], "text": "DUNKIN ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DUNNI", "recog_valid": false, "glyph_recog_text": "DUNKIN", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[333, 364], [334, 383], [342, 389], [352, 392], [360, 394], [366, 394], [384, 397], [396, 397], [408, 394], [417, 390], [418, 370], [410, 374], [397, 375], [387, 376], [373, 376], [361, 374], [352, 372], [344, 369]], "text": "DONUT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DONLT", "recog_valid": false, "glyph_recog_text": "DONUT", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188310.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319388.jpg", "caption": "a young boy playing with a toy skateboard", "annotations": [{"polygon": [[272, 434], [274, 446], [290, 447], [285, 456], [301, 461], [323, 455], [332, 443], [334, 428], [294, 429], [276, 432]], "text": "Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "266", "recog_valid": false, "glyph_recog_text": "Pnctcograph)", "glyph_recog_ld": 8.333326388942908e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057249.jpg", "caption": "a mirror on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319401.jpg", "caption": "a display case filled with cakes and desserts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057264.jpg", "caption": "a toy hula girl on the dashboard of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581553.jpg", "caption": "a street sign with a traffic sign and a don't honk sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319440.jpg", "caption": "a man in a red shirt holding a blue surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057298.jpg", "caption": "two giraffes drinking water from a pond", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581594.jpg", "caption": "a bear walking through a field with a log", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057308.jpg", "caption": "a parking meter with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450526.jpg", "caption": "a small food truck parked on the side of the road", "annotations": [{"polygon": [[92, 308], [91, 314], [140, 330], [189, 340], [193, 328], [125, 314], [112, 309]], "text": "www.bakkerij-otten.nl", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "www.bakkerij-otten.n", "recog_valid": false, "glyph_recog_text": "www.bakkenj-otten.n", "glyph_recog_ld": 0.900000049999975}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319494.jpg", "caption": "several boats are in the water near a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057361.jpg", "caption": "a busy street with many people walking and driving", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057375.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[149, 426], [325, 329], [338, 337], [181, 427]], "text": "SIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SIC", "recog_valid": true, "glyph_recog_text": "stc", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581708.jpg", "caption": "a traffic light is shown in front of a tall building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581719.jpg", "caption": "a young boy in a baseball uniform", "annotations": [{"polygon": [[330, 117], [334, 115], [340, 114], [346, 102], [344, 101], [342, 99], [342, 99], [343, 96], [346, 95], [349, 96], [350, 93], [348, 92], [345, 92], [344, 92], [345, 89], [348, 87], [353, 87], [358, 90], [359, 96], [365, 98], [366, 100], [370, 100], [374, 103], [373, 105], [373, 106], [374, 109], [373, 111], [370, 112], [366, 110], [363, 110], [365, 121], [368, 121], [370, 118], [372, 118], [372, 120], [370, 123], [366, 128], [363, 127], [360, 124], [359, 116], [356, 113], [348, 111], [346, 116], [349, 118], [351, 117], [354, 118], [355, 119], [347, 122], [342, 120], [338, 120], [333, 120]], "text": "A's", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "衣", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581738.jpg", "caption": "two red buses parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581744.jpg", "caption": "a computer monitor and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188529.jpg", "caption": "a mascot dressed as a bunny skiing on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057461.jpg", "caption": "a woman playing tennis on a court", "annotations": [{"polygon": [[97, 99], [182, 91], [180, 68], [95, 76]], "text": "usopen org", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "usopen org", "recog_valid": true, "glyph_recog_text": "usopen org", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319612.jpg", "caption": "a close up of a white flower with yellow centers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450684.jpg", "caption": "a baseball player standing on a field", "annotations": [{"polygon": [[392, 271], [394, 252], [399, 245], [436, 262], [428, 272], [429, 286]], "text": "SOX", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SOK", "recog_valid": false, "glyph_recog_text": "Sox", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450687.jpg", "caption": "a woman carrying a suitcase in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057481.jpg", "caption": "a woman sitting at a desk with a laptop and a phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450707.jpg", "caption": "two men sitting on a bench reading a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188575.jpg", "caption": "a red and white motorcycle parked in a garage", "annotations": [{"polygon": [[176, 234], [172, 252], [199, 267], [205, 249]], "text": "HARLEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HARLEY", "recog_valid": true, "glyph_recog_text": "HO费", "glyph_recog_ld": 0.1666680555532407}, {"polygon": [[208, 249], [202, 268], [244, 292], [249, 273]], "text": "DAVIDSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DAWIDSON", "recog_valid": false, "glyph_recog_text": "DAVIOSCN", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[199, 300], [245, 328], [235, 400], [195, 370]], "text": "99", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "00", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581797.jpg", "caption": "a large airplane parked on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188587.jpg", "caption": "pink biplane parked on grass with other planes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450763.jpg", "caption": "a large jetliner flying in the sky", "annotations": [{"polygon": [[160, 228], [162, 238], [179, 243], [194, 248], [205, 252], [214, 255], [226, 260], [222, 249]], "text": "Lutthansa", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Luttnansa", "recog_valid": false, "glyph_recog_text": "iviee/te", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450776.jpg", "caption": "a man in a colorful boat on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057579.jpg", "caption": "a group of people playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450800.jpg", "caption": "a baseball player is throwing a ball to the batter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057586.jpg", "caption": "a white stove with two burners and a microwave oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057595.jpg", "caption": "a woman holding a tennis racket and a ball", "annotations": [{"polygon": [[315, 417], [387, 417], [387, 450], [321, 450]], "text": "Benz", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Benz", "recog_valid": true, "glyph_recog_text": "Benz", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319749.jpg", "caption": "a pedestrian priority crossing sign on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057614.jpg", "caption": "a bride and groom cutting into their wedding cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000581904.jpg", "caption": "a large building with a clock on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450833.jpg", "caption": "a group of people riding skateboards on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188693.jpg", "caption": "a woman with luggage at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188712.jpg", "caption": "a living room with a television and a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057641.jpg", "caption": "a street with many people walking down it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188725.jpg", "caption": "two toothbrushes are in a holder on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319798.jpg", "caption": "two women carrying bananas on their heads", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057663.jpg", "caption": "a man sitting on a chair holding a sign that says us out of afghanistan", "annotations": [{"polygon": [[159, 208], [159, 208], [260, 207], [257, 245], [151, 242]], "text": "AFGHANISTAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AFGHANISTAN", "recog_valid": true, "glyph_recog_text": "AFGHANISTAN", "glyph_recog_ld": 1.0}, {"polygon": [[202, 175], [240, 174], [232, 205], [200, 204], [196, 189]], "text": "OUT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OUT", "recog_valid": true, "glyph_recog_text": "OUT", "glyph_recog_ld": 1.0}, {"polygon": [[391, 221], [391, 221], [386, 257], [419, 262], [422, 215], [412, 213]], "text": "D", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[400, 173], [402, 180], [410, 209], [466, 216], [466, 216], [471, 182]], "text": "NDS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "NDS", "recog_valid": true, "glyph_recog_text": "NDS", "glyph_recog_ld": 1.0}, {"polygon": [[339, 85], [335, 122], [335, 122], [416, 120], [440, 85]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SAC", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319818.jpg", "caption": "a mascot of a beaver is standing in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188749.jpg", "caption": "a small plane with propellers on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188771.jpg", "caption": "a green train on tracks near a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450918.jpg", "caption": "a red bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000450965.jpg", "caption": "a clock on a wall", "annotations": [{"polygon": [[155, 90], [155, 90], [361, 69], [361, 124], [157, 138]], "text": "UCCIANI", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "UCCIANI", "recog_valid": true, "glyph_recog_text": "UCCIANI", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188826.jpg", "caption": "a laptop computer with headphones and mouse on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188832.jpg", "caption": "joshua tree national park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319917.jpg", "caption": "a bag filled with various items including a laptop, cell phone, and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451003.jpg", "caption": "a woman eating a sandwich and drinking a beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057794.jpg", "caption": "three women and a child pose for a photo next to a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451012.jpg", "caption": "a man holding a surfboard on his head", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000319943.jpg", "caption": "a surfer riding a wave in the ocean", "annotations": [{"polygon": [[499, 199], [498, 225], [391, 223], [393, 194]], "text": "photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Hotograply", "recog_valid": false, "glyph_recog_text": "photography", "glyph_recog_ld": 0.7272729752063862}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451018.jpg", "caption": "a person laying in bed with a book on top of them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188875.jpg", "caption": "a person in the air on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188889.jpg", "caption": "a row of bicycles parked next to a sign that says think elephant", "annotations": [{"polygon": [[107, 159], [106, 194], [227, 199], [224, 163]], "text": "Think", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Think", "recog_valid": true, "glyph_recog_text": "Think", "glyph_recog_ld": 1.0}, {"polygon": [[237, 167], [237, 199], [286, 200], [286, 208], [292, 209], [291, 199], [367, 201], [367, 174]], "text": "Elephant", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Elephant", "recog_valid": true, "glyph_recog_text": "Elephant", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057828.jpg", "caption": "two parking meters are on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188902.jpg", "caption": "1949 ford f100 pickup - image 1 of 6", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057842.jpg", "caption": "a laptop on a treadmill with a mouse and keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188927.jpg", "caption": "a group of people watching a plane fly by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451099.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188955.jpg", "caption": "a ski lift in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000188972.jpg", "caption": "a train at a station with people standing on the platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451121.jpg", "caption": "a woman eating a donut in front of a menu", "annotations": [{"polygon": [[351, 209], [352, 239], [430, 230], [432, 202]], "text": "special", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Special", "recog_valid": false, "glyph_recog_text": "special", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057926.jpg", "caption": "a kitchen with a stove, sink, and cabinets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451166.jpg", "caption": "a man in white playing tennis on a clay court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189026.jpg", "caption": "a man holding a pizza in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189027.jpg", "caption": "a woman on skis with a dog in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320115.jpg", "caption": "photo of mets' jim thome's retirement ceremony", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057977.jpg", "caption": "a large room with many signs hanging from the ceiling", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000057979.jpg", "caption": "a parking meter and a plant on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451219.jpg", "caption": "a fire hydrant painted has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189094.jpg", "caption": "a man holding up a framed photo of a wave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451248.jpg", "caption": "two people standing in the snow with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451279.jpg", "caption": "a desk with a laptop, a computer, and a notebook", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451283.jpg", "caption": "a woman standing behind a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320222.jpg", "caption": "a metal rack with two donuts", "annotations": [{"polygon": [[235, 322], [238, 354], [352, 351], [347, 320]], "text": "MOVIE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MOVIE", "recog_valid": true, "glyph_recog_text": "MOVIE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451300.jpg", "caption": "a bus with the mozilla logo on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451312.jpg", "caption": "two trains parked in a garage at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320245.jpg", "caption": "a store front with a clock in the window", "annotations": [{"polygon": [[1, 121], [2, 149], [79, 160], [75, 128]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "THE", "recog_valid": true, "glyph_recog_text": "THE", "glyph_recog_ld": 1.0}, {"polygon": [[84, 128], [86, 158], [171, 167], [172, 138]], "text": "GATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GATE", "recog_valid": true, "glyph_recog_text": "GATE", "glyph_recog_ld": 1.0}, {"polygon": [[176, 141], [177, 167], [278, 177], [273, 152]], "text": "CLOCK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CLOCK", "recog_valid": true, "glyph_recog_text": "CLOCK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320273.jpg", "caption": "a green truck with a white cover on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320275.jpg", "caption": "a flock of birds flying around a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058134.jpg", "caption": "a laptop sitting on a table in a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058143.jpg", "caption": "a man standing next to a bike rack on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058153.jpg", "caption": "a table with breakfast food and coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320308.jpg", "caption": "three people with backpacks and dogs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451392.jpg", "caption": "a black and red train engine on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451400.jpg", "caption": "a stuffed bear with a daisy in its mouth", "annotations": [{"polygon": [[341, 207], [366, 216], [358, 230], [346, 240], [335, 243], [328, 241], [328, 235], [331, 230], [337, 227], [336, 220], [337, 213]], "text": "BABIES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "入", "recog_valid": false, "glyph_recog_text": "CIRYE", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320353.jpg", "caption": "a white motorcycle parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320362.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[159, 189], [408, 185], [413, 282], [159, 285]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[189, 291], [387, 287], [388, 328], [190, 330]], "text": "HAMMERTIME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HAMMERTIE!", "recog_valid": false, "glyph_recog_text": "HAMMERTIME", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189295.jpg", "caption": "a traffic light and a building with a large window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058241.jpg", "caption": "a clock hanging from a shelf in a library", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320390.jpg", "caption": "a woman is standing in front of a pile of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320429.jpg", "caption": "a group of people on a snowboard hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189361.jpg", "caption": "a bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058316.jpg", "caption": "a train engine sitting on the tracks in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451550.jpg", "caption": "a man riding a bike on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320503.jpg", "caption": "a painting of flowers in vases on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451580.jpg", "caption": "a man playing basketball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451594.jpg", "caption": "a double decker bus driving down a street", "annotations": [{"polygon": [[349, 363], [368, 353], [512, 378], [511, 392]], "text": "SES", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "6351", "recog_valid": false, "glyph_recog_text": "S E 5", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451597.jpg", "caption": "a bowl of fruit and yogurt with a spoon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189461.jpg", "caption": "a teddy bear and a cup of coffee next to a donut", "annotations": [{"polygon": [[251, 240], [251, 265], [274, 265], [292, 264], [308, 260], [310, 259], [310, 235], [305, 239], [290, 240], [278, 240], [267, 240]], "text": "DONUTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BEN645", "recog_valid": false, "glyph_recog_text": "DONUTS", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058397.jpg", "caption": "a large building with a clock on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058413.jpg", "caption": "a united airlines airplane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189491.jpg", "caption": "a woman in blue tennis dress swinging a tennis racket", "annotations": [{"polygon": [[91, 223], [91, 223], [504, 213], [504, 127], [378, 130], [376, 100], [86, 100]], "text": "J.P.Mor", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "J.PAMOr", "recog_valid": false, "glyph_recog_text": "J.P.Mor", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320566.jpg", "caption": "a police officer on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320593.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189539.jpg", "caption": "a picture of a bunch of fruits and vegetables", "annotations": [{"polygon": [[121, 442], [134, 481], [344, 478], [339, 443]], "text": "VITAMINS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "VITAMINS", "recog_valid": true, "glyph_recog_text": "VITAMINS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451690.jpg", "caption": "a little girl sitting on the floor playing with a wii remote", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451703.jpg", "caption": "a young man sitting on the steps holding a baseball glove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451707.jpg", "caption": "a man serving food to a little girl at a buffet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189568.jpg", "caption": "a traffic light on a street corner", "annotations": [{"polygon": [[223, 218], [268, 206], [273, 226], [222, 239]], "text": "STANOD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SIANG", "recog_valid": false, "glyph_recog_text": "STANOO", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189614.jpg", "caption": "a woman in red pants is playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451768.jpg", "caption": "a large jet airplane flying through the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058563.jpg", "caption": "a small airplane parked on a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058565.jpg", "caption": "a large jetliner flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451793.jpg", "caption": "a woman riding a bike next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451800.jpg", "caption": "two slices of pizza on a paper plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451803.jpg", "caption": "a mouse and keyboard on a desk next to a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189666.jpg", "caption": "a man is making donuts in a machine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058595.jpg", "caption": "a person holding a cell phone", "annotations": [{"polygon": [[60, 71], [59, 96], [125, 96], [129, 102], [181, 101], [179, 71]], "text": "Observer", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "O6senen", "recog_valid": false, "glyph_recog_text": "Observer", "glyph_recog_ld": 0.5000006249992187}, {"polygon": [[9, 71], [7, 103], [56, 101], [56, 72]], "text": "Goa", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "God", "recog_valid": false, "glyph_recog_text": "Goa", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058597.jpg", "caption": "a clock tower in a city with people walking around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058601.jpg", "caption": "a man playing tennis on a court", "annotations": [{"polygon": [[0, 156], [97, 154], [98, 219], [0, 223]], "text": "hen", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "hen", "recog_valid": true, "glyph_recog_text": "hen", "glyph_recog_ld": 1.0}, {"polygon": [[176, 181], [351, 174], [351, 198], [175, 206]], "text": "Ostachsische", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Qstsachsische", "recog_valid": false, "glyph_recog_text": "Ostachsische", "glyph_recog_ld": 0.8461539644969504}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058609.jpg", "caption": "a wooden tray with wine glasses, cheese, and fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451826.jpg", "caption": "a laptop on a desk with a guitar and headphones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189701.jpg", "caption": "a man in a yellow shirt is playing tennis", "annotations": [{"polygon": [[111, 19], [217, 19], [216, 65], [109, 66]], "text": "IBM", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "IBM", "recog_valid": true, "glyph_recog_text": "IBM", "glyph_recog_ld": 1.0}, {"polygon": [[122, 80], [195, 80], [195, 113], [123, 113]], "text": "204", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "204", "recog_valid": true, "glyph_recog_text": "204", "glyph_recog_ld": 1.0}, {"polygon": [[480, 106], [480, 106], [388, 96], [381, 119], [481, 137]], "text": "GARNI", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "GARNIG", "recog_valid": false, "glyph_recog_text": "GARNI", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058633.jpg", "caption": "a group of people standing next to a stop sign", "annotations": [{"polygon": [[170, 66], [171, 107], [173, 113], [178, 118], [184, 120], [280, 121], [280, 99], [299, 98], [300, 79], [295, 68]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320803.jpg", "caption": "a motorcycle is parked in a garage under a tent", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320816.jpg", "caption": "a woman in yellow shirt and black shorts is playing tennis", "annotations": [{"polygon": [[21, 139], [119, 143], [117, 171], [21, 164]], "text": "USOPEN.ORG", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "usopen.org", "recog_valid": false, "glyph_recog_text": "USOPEN.ORG", "glyph_recog_ld": 0.1000008999991}, {"polygon": [[401, 195], [500, 202], [513, 225], [514, 274], [398, 264]], "text": "CH", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CH", "recog_valid": true, "glyph_recog_text": "CH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451893.jpg", "caption": "a group of boats parked on the shore of a lake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058677.jpg", "caption": "a hot dog and a bottle of curry sauce", "annotations": [{"polygon": [[206, 170], [207, 177], [234, 186], [263, 187], [274, 185], [266, 204], [260, 211], [255, 211], [246, 206], [232, 205], [214, 201], [196, 193], [190, 186], [189, 177], [190, 169], [193, 165]], "text": "Curry", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Curry", "recog_valid": true, "glyph_recog_text": "Curry", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000451902.jpg", "caption": "two men cutting into a cake with a flag on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320858.jpg", "caption": "a woman standing by a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189805.jpg", "caption": "a large red semi truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058735.jpg", "caption": "a parking meter on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058736.jpg", "caption": "a purple blanket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320892.jpg", "caption": "a crowd of people standing around a large airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058766.jpg", "caption": "a blue and white train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320911.jpg", "caption": "a group of people walking towards a small plane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320926.jpg", "caption": "a bus driving down a street with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000320929.jpg", "caption": "two children playing with blocks on a rug", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452004.jpg", "caption": "a woman sitting on a couch with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058796.jpg", "caption": "a collage of pictures of food and drinks", "annotations": [{"polygon": [[232, 111], [222, 126], [235, 130], [248, 134], [270, 145], [281, 144], [283, 130], [270, 123]], "text": "DAILY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Dailu", "recog_valid": false, "glyph_recog_text": "DAILY", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058815.jpg", "caption": "a group of people standing around a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058834.jpg", "caption": "a formal dining room with a chandelier and a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452050.jpg", "caption": "a group of people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058864.jpg", "caption": "a laptop and a beer on a wooden table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058867.jpg", "caption": "a red train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452087.jpg", "caption": "a train is traveling down the tracks in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321048.jpg", "caption": "a group of people standing around a small portable toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000189996.jpg", "caption": "a horse and carriage on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058926.jpg", "caption": "a man riding a bicycle with a bag on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058929.jpg", "caption": "a young girl standing on a beach with a kite", "annotations": [{"polygon": [[268, 373], [274, 365], [288, 358], [295, 355], [306, 350], [312, 370], [299, 373], [287, 378], [283, 383], [275, 397]], "text": "Boats", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Boate", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[338, 368], [351, 345], [363, 350], [371, 357], [378, 364], [382, 374], [383, 380], [384, 387], [370, 395], [367, 388], [365, 382], [358, 377], [350, 373]], "text": "Ships", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bhiy", "recog_valid": false, "glyph_recog_text": "Ships", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[271, 339], [270, 367], [389, 372], [391, 342]], "text": "VINTAGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "VINTAGE", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058930.jpg", "caption": "a green and white bus driving down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190016.jpg", "caption": "a group of men standing around a truck with a pile of furniture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452162.jpg", "caption": "a motorcycle parked under a tent with many colorful flags", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452167.jpg", "caption": "a man riding a red motorcycle on a track", "annotations": [{"polygon": [[290, 278], [290, 278], [329, 255], [325, 248], [298, 263], [283, 266]], "text": "DROCNET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "San", "recog_valid": false, "glyph_recog_text": "OMOCRET", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452168.jpg", "caption": "a parrot sitting on a cart with a sign that says no birds allowed", "annotations": [{"polygon": [[196, 444], [295, 452], [296, 427], [200, 420]], "text": "ALLOWED", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ALLOWED", "recog_valid": true, "glyph_recog_text": "ALLOWED", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190026.jpg", "caption": "a motorcycle parked on the side of a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000058987.jpg", "caption": "a small car with a surfboard on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321132.jpg", "caption": "a man is riding a surfboard in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190064.jpg", "caption": "a red bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321140.jpg", "caption": "a collage of pictures showing various items including yellow items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190133.jpg", "caption": "a bento box with fruit, vegetables, and a heart shaped pastry", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452279.jpg", "caption": "two women sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059075.jpg", "caption": "a woman standing in front of a stand of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190157.jpg", "caption": "british airways to cut up to 1,000 jobs", "annotations": [{"polygon": [[232, 204], [229, 224], [275, 247], [279, 226]], "text": "BRITISH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BRITISH", "recog_valid": true, "glyph_recog_text": "BRITISH", "glyph_recog_ld": 1.0}, {"polygon": [[281, 228], [275, 248], [325, 272], [325, 251]], "text": "AIRWAYS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AIRWAYS", "recog_valid": true, "glyph_recog_text": "AIRWAYS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190199.jpg", "caption": "a small plane flying over a field with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059142.jpg", "caption": "a fire hydrant and three newspapers on a brick sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321302.jpg", "caption": "an old black train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190232.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[391, 212], [392, 242], [513, 240], [507, 220], [490, 220], [487, 209], [479, 209], [481, 220], [412, 219], [409, 209], [392, 210]], "text": "anada", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Tanada", "recog_valid": false, "glyph_recog_text": "anada", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190234.jpg", "caption": "a toilet in a small room with a round window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059165.jpg", "caption": "a boy sitting at a table with a bunch of donuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321310.jpg", "caption": "a group of white vases sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059167.jpg", "caption": "a group of people standing in line to board a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059170.jpg", "caption": "a double decker bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059218.jpg", "caption": "a stop sign with a palm tree in the background", "annotations": [{"polygon": [[212, 220], [211, 262], [316, 262], [316, 221], [265, 220]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452441.jpg", "caption": "a stack of suitcases sitting on top of each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059229.jpg", "caption": "a woman in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059231.jpg", "caption": "two parking meters with two cups on them", "annotations": [{"polygon": [[0, 91], [134, 88], [136, 117], [-1, 120]], "text": "AEOEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "AEDEL", "recog_valid": false, "glyph_recog_text": "AEOEL", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321389.jpg", "caption": "a group of boys playing soccer on a field", "annotations": [{"polygon": [[0, 266], [-4, 324], [216, 321], [223, 267]], "text": "ME", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ME", "recog_valid": true, "glyph_recog_text": "ME", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059252.jpg", "caption": "a group of people are playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321418.jpg", "caption": "a group of people walking with cows in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452495.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[287, 229], [313, 225], [325, 288], [304, 294]], "text": "35", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "C", "recog_valid": false, "glyph_recog_text": "3 5", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[314, 224], [338, 221], [344, 247], [345, 286], [324, 289]], "text": "85", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UO", "recog_valid": false, "glyph_recog_text": "CO LO", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452498.jpg", "caption": "a television on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452512.jpg", "caption": "a fire hydrant is in front of a building", "annotations": [{"polygon": [[208, 66], [193, 98], [254, 99], [259, 71]], "text": "240", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "240", "recog_valid": true, "glyph_recog_text": "240", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190382.jpg", "caption": "a baseball player is pitching a ball on a field", "annotations": [{"polygon": [[378, 166], [377, 203], [423, 203], [426, 169], [420, 164], [411, 163], [384, 164]], "text": "38", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "38", "recog_valid": true, "glyph_recog_text": "38", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059318.jpg", "caption": "a school bus is parked in the dark at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059321.jpg", "caption": "a street sign with a street light and a building in the background", "annotations": [{"polygon": [[158, 208], [155, 220], [240, 238], [241, 227]], "text": "BROADWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BROADWAY", "recog_valid": true, "glyph_recog_text": "BROADWAY", "glyph_recog_ld": 1.0}, {"polygon": [[357, 293], [360, 338], [487, 335], [485, 294]], "text": "KARA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "KARA", "recog_valid": true, "glyph_recog_text": "KARA", "glyph_recog_ld": 1.0}, {"polygon": [[326, 365], [324, 408], [489, 409], [489, 369]], "text": "FORAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "FORAN", "recog_valid": true, "glyph_recog_text": "FORAN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321466.jpg", "caption": "a person on a snowboard", "annotations": [{"polygon": [[318, 346], [321, 301], [382, 302], [380, 348]], "text": "ride", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ride", "recog_valid": true, "glyph_recog_text": "ride", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321492.jpg", "caption": "a woman holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321495.jpg", "caption": "the day today - the irish news", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059367.jpg", "caption": "a group of people sitting on couches in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321517.jpg", "caption": "a stop sign and a church with a stop sign", "annotations": [{"polygon": [[329, 308], [331, 342], [417, 339], [412, 306]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[326, 359], [321, 393], [434, 389], [427, 353]], "text": "ARRET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ARRET", "recog_valid": true, "glyph_recog_text": "ARRET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059384.jpg", "caption": "a red car is parked at an electric charging station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059385.jpg", "caption": "three suitcases sitting on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452615.jpg", "caption": "a desk with a computer, a laptop, and a monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452619.jpg", "caption": "two children holding umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452621.jpg", "caption": "a yellow bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321583.jpg", "caption": "a desk with three computer monitors and a laptop", "annotations": [{"polygon": [[388, 221], [389, 233], [416, 218], [418, 188]], "text": "VALLEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "2227", "recog_valid": false, "glyph_recog_text": "VALLEY", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190521.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190530.jpg", "caption": "a baseball player holding a bat in front of a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059475.jpg", "caption": "a man is doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190554.jpg", "caption": "a woman sitting at a table with a birthday cake", "annotations": [{"polygon": [[122, 262], [129, 296], [219, 278], [214, 247]], "text": "happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tiappy", "recog_valid": false, "glyph_recog_text": "happy", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[133, 298], [143, 344], [231, 325], [222, 279]], "text": "birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "内", "recog_valid": false, "glyph_recog_text": "birthday", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059489.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190579.jpg", "caption": "a herd of sheep standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059526.jpg", "caption": "a pizza in a box", "annotations": [{"polygon": [[388, 237], [439, 225], [447, 253], [396, 266]], "text": "Cola", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "uet", "recog_valid": false, "glyph_recog_text": "Cola", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[21, 87], [33, 118], [118, 98], [105, 67]], "text": "LA ", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "LASCALAS", "recog_valid": false, "glyph_recog_text": "LA", "glyph_recog_ld": 0.2500009374988281}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190603.jpg", "caption": "a street at night with a car parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321679.jpg", "caption": "a black truck driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321709.jpg", "caption": "a man riding a motorcycle on a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059584.jpg", "caption": "cows grazing in the frosty grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452806.jpg", "caption": "a man standing in a living room holding a remote", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190670.jpg", "caption": "a baseball player standing at home plate with a catcher behind him", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452821.jpg", "caption": "a street sign with two street signs on it", "annotations": [{"polygon": [[154, 277], [157, 316], [243, 313], [244, 318], [252, 318], [255, 313], [266, 313], [264, 280]], "text": "Partridge", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Partridge", "recog_valid": true, "glyph_recog_text": "Partridge", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059611.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059618.jpg", "caption": "a steam train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452837.jpg", "caption": "a red plate with pasta and a glass of red wine", "annotations": [{"polygon": [[247, 126], [260, 150], [259, 167], [214, 178], [185, 133], [235, 120]], "text": "ED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ED", "recog_valid": true, "glyph_recog_text": "ED", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321766.jpg", "caption": "two giraffes standing in the middle of a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190723.jpg", "caption": "a train is parked in a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321806.jpg", "caption": "a small airplane parked on a wet runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190754.jpg", "caption": "a bathroom with a toilet and a sign has texts", "annotations": [{"polygon": [[118, 312], [158, 306], [158, 301], [170, 300], [170, 304], [279, 297], [286, 312], [281, 316], [266, 317], [259, 323], [247, 324], [243, 320], [235, 318], [122, 331], [118, 328], [115, 321]], "text": "entourage", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "entourage", "recog_valid": true, "glyph_recog_text": "entourage", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059685.jpg", "caption": "a man sitting on a bed with a laptop and a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321831.jpg", "caption": "a jockey on a horse racing down the track", "annotations": [{"polygon": [[34, 305], [80, 311], [91, 400], [38, 394]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CS", "recog_valid": false, "glyph_recog_text": "o,", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452909.jpg", "caption": "a little girl standing in front of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452912.jpg", "caption": "a group of men in baseball uniforms standing around a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452919.jpg", "caption": "a grizzly bear and her cubs are walking on the side of a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000452922.jpg", "caption": "a large clock tower with a large clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321854.jpg", "caption": "a palm tree in the middle of a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321860.jpg", "caption": "a messy bedroom with a guitar and a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190792.jpg", "caption": "a child in a yellow raincoat looking at apples", "annotations": [{"polygon": [[93, 254], [107, 262], [126, 231], [121, 225], [105, 225]], "text": "U.S.F.L", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "735", "recog_valid": false, "glyph_recog_text": "Tasn", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321897.jpg", "caption": "a clock on the wall", "annotations": [{"polygon": [[161, 67], [157, 113], [378, 162], [362, 119]], "text": "REGUA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "REGUA", "recog_valid": true, "glyph_recog_text": "REGUA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190849.jpg", "caption": "a backpack with various electronic devices laid out on a camouflage blanket", "annotations": [{"polygon": [[82, 234], [92, 260], [133, 242], [126, 214]], "text": "CAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CAT", "recog_valid": true, "glyph_recog_text": "CAT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321937.jpg", "caption": "a person riding a green motorcycle on a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453010.jpg", "caption": "a computer monitor on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059798.jpg", "caption": "a wine glass and a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059817.jpg", "caption": "a parking meter with a clock on it", "annotations": [{"polygon": [[359, 232], [359, 232], [375, 217], [398, 236], [376, 251]], "text": "55", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "55", "recog_valid": true, "glyph_recog_text": "55", "glyph_recog_ld": 1.0}, {"polygon": [[332, 208], [352, 194], [374, 212], [352, 229]], "text": "03:", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "03", "recog_valid": false, "glyph_recog_text": "03:", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[137, 389], [131, 396], [181, 435], [187, 429]], "text": "FAS118", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "55215", "recog_valid": false, "glyph_recog_text": "中62114", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000321991.jpg", "caption": "a man in a yellow shirt playing tennis on a blue court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190920.jpg", "caption": "a firefighter wearing a helmet and a fire truck", "annotations": [{"polygon": [[296, 275], [293, 309], [509, 322], [511, 286], [509, 286]], "text": "EUERWEHA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "EUERWEHR", "recog_valid": false, "glyph_recog_text": "EUERWEHA", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059868.jpg", "caption": "a kitchen with a refrigerator, stove, and microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190942.jpg", "caption": "two people holding a snowboard in front of a wall", "annotations": [{"polygon": [[176, 88], [176, 88], [176, 120], [176, 124], [356, 117], [353, 85], [175, 86]], "text": "dicipline", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "discipiine", "recog_valid": false, "glyph_recog_text": "dicipline", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453087.jpg", "caption": "a highway with two signs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322023.jpg", "caption": "a red fire hydrant sitting in the middle of a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190952.jpg", "caption": "a pizza on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322024.jpg", "caption": "a bus is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322049.jpg", "caption": "a train on the tracks with a yellow and blue door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453132.jpg", "caption": "a stop sign on a pole in front of a building", "annotations": [{"polygon": [[94, 187], [93, 264], [284, 271], [278, 201]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190991.jpg", "caption": "a snowboarder doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453137.jpg", "caption": "a man riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000190994.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059921.jpg", "caption": "a man taking a picture of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322076.jpg", "caption": "a green train engine on a track near a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453152.jpg", "caption": "a horse pulling a buggy on a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322082.jpg", "caption": "two women riding horses on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322090.jpg", "caption": "a baseball player holding a bat in his hand", "annotations": [{"polygon": [[217, 255], [265, 232], [316, 228], [353, 239], [356, 231], [368, 233], [378, 264], [380, 305], [364, 298], [333, 283], [300, 276], [261, 280], [234, 292], [222, 305], [217, 319], [209, 320], [203, 305], [200, 284], [202, 261], [207, 254], [215, 253]], "text": "CLEARWATER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DEARWATER", "recog_valid": false, "glyph_recog_text": "CLEARWATER", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453167.jpg", "caption": "a woman in a red dress posing next to a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322097.jpg", "caption": "a street at night with fog and lights", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322112.jpg", "caption": "a man and a woman with luggage standing in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322127.jpg", "caption": "a bridge over a river with boats on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000059985.jpg", "caption": "a woman riding a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060005.jpg", "caption": "a green and yellow train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060018.jpg", "caption": "a street sign with a number on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191102.jpg", "caption": "a bar with a clock on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060056.jpg", "caption": "a man holding a frisbee in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060060.jpg", "caption": "a black and white photo of a plane on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453286.jpg", "caption": "a street light with a street sign on it", "annotations": [{"polygon": [[284, 306], [320, 276], [317, 287], [317, 295], [286, 319]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[281, 340], [329, 362], [323, 368], [323, 375], [284, 360]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[163, 385], [155, 390], [152, 396], [150, 404], [154, 413], [161, 416], [167, 416], [182, 425], [188, 422], [191, 408], [188, 397], [185, 394]], "text": "60", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "60", "recog_valid": true, "glyph_recog_text": "60", "glyph_recog_ld": 1.0}, {"polygon": [[149, 473], [148, 506], [213, 456], [201, 437]], "text": "MADISON", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "MADISOL", "recog_valid": false, "glyph_recog_text": "MADISON", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453287.jpg", "caption": "a street sign on a pole in front of a brick building", "annotations": [{"polygon": [[241, 196], [289, 252], [297, 239], [293, 205], [250, 152]], "text": "30", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "30", "recog_valid": true, "glyph_recog_text": "3 0", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[181, 349], [186, 308], [233, 292], [229, 335]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "M", "recog_valid": true, "glyph_recog_text": "M", "glyph_recog_ld": 1.0}, {"polygon": [[244, 309], [276, 298], [276, 277], [246, 287]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "ST", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191177.jpg", "caption": "a group of horses on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322249.jpg", "caption": "a stop sign on a pole", "annotations": [{"polygon": [[192, 252], [195, 216], [276, 228], [276, 262]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "dOIS", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453332.jpg", "caption": "a bus driving down a street with a clock tower in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191197.jpg", "caption": "a woman and a girl playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453348.jpg", "caption": "a sign on a wall that says nelson street", "annotations": [{"polygon": [[73, 190], [218, 226], [221, 252], [74, 217]], "text": "NELSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NELSON", "recog_valid": true, "glyph_recog_text": "NELSON", "glyph_recog_ld": 1.0}, {"polygon": [[239, 231], [382, 267], [385, 293], [240, 257]], "text": "STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STREET", "recog_valid": true, "glyph_recog_text": "STREET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453351.jpg", "caption": "a bathroom with a toilet, sink, and bathtub", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453352.jpg", "caption": "a train at a train station at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191230.jpg", "caption": "a red brick building with a traffic light and cars parked in front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322315.jpg", "caption": "a black and white photo of a bike in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191251.jpg", "caption": "a small dog wearing a life jacket on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322325.jpg", "caption": "hello kitty microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060182.jpg", "caption": "a man and woman riding on the back of a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191261.jpg", "caption": "a girl laying on her back on a bed", "annotations": [{"polygon": [[290, 105], [368, 117], [365, 145], [286, 132]], "text": "WHATEVER", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "W4ATe ve r", "recog_valid": false, "glyph_recog_text": "WHATEVER", "glyph_recog_ld": 0.3000006999993}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453409.jpg", "caption": "a large crowd of people standing around a colorful carnival", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191277.jpg", "caption": "a crowd of people at an outdoor farmers market", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453421.jpg", "caption": "a baseball game in progress with a pitcher throwing the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191321.jpg", "caption": "a street corner with a green street sign and cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191320.jpg", "caption": "a kfc sign on a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191327.jpg", "caption": "a couple of motorcycles parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191350.jpg", "caption": "a woman riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453512.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322468.jpg", "caption": "a woman in white tennis outfit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453549.jpg", "caption": "a woman in green and black", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453561.jpg", "caption": "a tug boat in the water with a boat behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453566.jpg", "caption": "a street sign with two signs on it", "annotations": [{"polygon": [[121, 301], [183, 317], [186, 335], [122, 321]], "text": "YEHUDA-", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YEHUDA", "recog_valid": false, "glyph_recog_text": "YEHUDA-", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[307, 328], [351, 309], [351, 322], [308, 339]], "text": "REHOV", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REHOV", "recog_valid": true, "glyph_recog_text": "REHOV", "glyph_recog_ld": 1.0}, {"polygon": [[357, 300], [364, 306], [366, 314], [447, 281], [441, 261], [382, 287]], "text": "YAVNEE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "YAVNE", "recog_valid": false, "glyph_recog_text": "YAVNEE", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[193, 318], [245, 332], [244, 348], [190, 336]], "text": "-HALEVI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HALEVI", "recog_valid": false, "glyph_recog_text": "-HALEVI", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191439.jpg", "caption": "a man standing in a kitchen with a refrigerator and a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453586.jpg", "caption": "a boat is docked at the dock at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191457.jpg", "caption": "a large blue and white airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322530.jpg", "caption": "a red train car with a ladder and a ladder", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453611.jpg", "caption": "a large airplane sitting on top of a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060400.jpg", "caption": "a stop sign in front of a forest with trees", "annotations": [{"polygon": [[320, 211], [320, 211], [319, 247], [369, 258], [365, 224]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060425.jpg", "caption": "a group of people cutting into a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453641.jpg", "caption": "a bird sitting on a branch with a twig in its mouth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060433.jpg", "caption": "a double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060458.jpg", "caption": "a red and yellow double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453676.jpg", "caption": "a cat laying on a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453680.jpg", "caption": "a man and a boy sitting on a couch holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453686.jpg", "caption": "a man and a woman standing in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453697.jpg", "caption": "a young boy wearing a helmet and a pink shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191573.jpg", "caption": "an apple keyboard is shown on a black surface", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453734.jpg", "caption": "two men standing on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322670.jpg", "caption": "a street sign with a sign that says don't honk sex st", "annotations": [{"polygon": [[134, 179], [229, 169], [231, 192], [135, 203]], "text": "DON'T", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DON'T", "recog_valid": true, "glyph_recog_text": "DON'T", "glyph_recog_ld": 1.0}, {"polygon": [[139, 209], [229, 198], [232, 222], [142, 232]], "text": "HONK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HONK", "recog_valid": true, "glyph_recog_text": "HONK", "glyph_recog_ld": 1.0}, {"polygon": [[291, 156], [374, 151], [378, 205], [294, 210]], "text": "SEX", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SEX", "recog_valid": true, "glyph_recog_text": "SEX", "glyph_recog_ld": 1.0}, {"polygon": [[383, 150], [424, 148], [425, 177], [386, 179]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "ST", "glyph_recog_ld": 1.0}, {"polygon": [[152, 278], [225, 271], [229, 296], [158, 304]], "text": "$350", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "$350", "recog_valid": true, "glyph_recog_text": "$350", "glyph_recog_ld": 1.0}, {"polygon": [[133, 311], [256, 297], [253, 319], [134, 332]], "text": "PENALTY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PENALTY", "recog_valid": true, "glyph_recog_text": "PENALTY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322707.jpg", "caption": "a food cart on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453787.jpg", "caption": "a young child on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453801.jpg", "caption": "several laptops are sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191661.jpg", "caption": "beachmont and montrose street signs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322734.jpg", "caption": "a train on the tracks with a few cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322738.jpg", "caption": "a man riding a horse in an arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322755.jpg", "caption": "a group of people playing baseball on a field", "annotations": [{"polygon": [[288, 241], [277, 273], [332, 297], [343, 264]], "text": "219", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "239", "recog_valid": false, "glyph_recog_text": "219", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060624.jpg", "caption": "a person holding a remote control in front of a television", "annotations": [{"polygon": [[367, 376], [413, 346], [416, 351], [371, 383]], "text": "SONY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SONY", "recog_valid": true, "glyph_recog_text": "3安H5", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191731.jpg", "caption": "a banana tree with a bunch of bananas on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000322847.jpg", "caption": "a basket of teddy bears", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191805.jpg", "caption": "a person holding a book with a dog on the cover", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191828.jpg", "caption": "two men are working on their snowboards in a snow covered area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060774.jpg", "caption": "a man sitting in front of a pile of bird cages", "annotations": [{"polygon": [[318, 368], [320, 359], [323, 355], [327, 350], [333, 348], [339, 346], [336, 337], [325, 340], [317, 346], [311, 355], [309, 363], [309, 368]], "text": "LASKAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ASKAP", "recog_valid": false, "glyph_recog_text": "LASHAR", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191847.jpg", "caption": "a blue bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191850.jpg", "caption": "people walking down the street with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000453998.jpg", "caption": "a banana in a car seat", "annotations": [{"polygon": [[39, 200], [99, 221], [114, 244], [106, 255], [76, 245], [28, 210]], "text": "evenflo", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "evenfto", "recog_valid": false, "glyph_recog_text": "evenflo", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191858.jpg", "caption": "a woman in a suit holding a red card", "annotations": [{"polygon": [[215, 464], [213, 495], [293, 496], [293, 468]], "text": "foto", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "foto", "recog_valid": true, "glyph_recog_text": "fato", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[312, 463], [313, 497], [418, 497], [419, 473]], "text": "banco", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "banco", "recog_valid": true, "glyph_recog_text": "banco", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191873.jpg", "caption": "a white and red truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191879.jpg", "caption": "a man sitting on a bench by the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060820.jpg", "caption": "a white mixer with brown batter in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454068.jpg", "caption": "a red fire hydrant sitting in a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454072.jpg", "caption": "a bottle of beer and some ravioli", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060858.jpg", "caption": "a shirtless man is standing on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454078.jpg", "caption": "a military truck driving down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454086.jpg", "caption": "a large blue truck parked in a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191947.jpg", "caption": "a brown and white calf standing next to a brown cow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454103.jpg", "caption": "a person is using a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191962.jpg", "caption": "a bowl of oranges and apples sitting on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060890.jpg", "caption": "a pelican standing on the grass next to a seagull", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060892.jpg", "caption": "two beds with white sheets and pillows", "annotations": [{"polygon": [[392, 336], [405, 341], [381, 397], [366, 391]], "text": "LEGEND", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LEGEND", "recog_valid": true, "glyph_recog_text": "LEGEND", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191984.jpg", "caption": "a stuffed animal sitting on a couch in a messy room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060915.jpg", "caption": "a man is in a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191994.jpg", "caption": "a tray with a sandwich and a bowl of soup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000191995.jpg", "caption": "a man riding a yellow motorcycle on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192009.jpg", "caption": "a stop sign with a sign has texts", "annotations": [{"polygon": [[248, 198], [345, 203], [347, 185], [354, 182], [365, 174], [353, 154], [246, 147], [238, 157], [236, 186], [240, 193]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[271, 244], [270, 272], [330, 274], [334, 246]], "text": "ALL-WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALLSIA", "recog_valid": false, "glyph_recog_text": "ALL-WAY", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454158.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323091.jpg", "caption": "a man in a wetsuit riding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060952.jpg", "caption": "a cat is peeking out of the grille of a car", "annotations": [{"polygon": [[417, 312], [417, 312], [479, 301], [483, 306], [483, 312], [481, 348], [476, 356], [414, 376], [409, 373], [406, 365], [408, 329], [410, 319]], "text": "3676", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "3676", "recog_valid": true, "glyph_recog_text": "3676", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000060953.jpg", "caption": "three planes flying in formation", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454171.jpg", "caption": "a group of people sitting at a table in front of a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323109.jpg", "caption": "a man eating pizza with a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323125.jpg", "caption": "a person riding a dirt bike on a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323127.jpg", "caption": "a blue and white surfboard leaning against a fence", "annotations": [{"polygon": [[28, 300], [43, 343], [11, 352], [-1, 316]], "text": "Roxu", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "888", "recog_valid": false, "glyph_recog_text": "Roxu", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323133.jpg", "caption": "a living room with a couch, a television, and a bookcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454206.jpg", "caption": "delta airlines boeing 757-200 nr 909 at los angeles international airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323147.jpg", "caption": "two men in suits standing next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454236.jpg", "caption": "a yellow school bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192098.jpg", "caption": "a view of the city from a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454269.jpg", "caption": "a young boy in a red baseball uniform holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323203.jpg", "caption": "a woman holding a cell phone while standing in front of a crowd", "annotations": [{"polygon": [[318, 274], [357, 321], [345, 331], [306, 284]], "text": "KINGS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KINGS", "recog_valid": true, "glyph_recog_text": "KINGS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323213.jpg", "caption": "three women in safety vests and vests are standing in front of a door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061076.jpg", "caption": "a truck driving down a snowy road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323252.jpg", "caption": "a red and white train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454372.jpg", "caption": "a cat drinking from a bottle next to a dog bowl", "annotations": [{"polygon": [[186, 359], [214, 363], [233, 361], [253, 359], [263, 356], [266, 378], [251, 384], [227, 388], [208, 388], [186, 388]], "text": "BEASTE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BEASTE", "recog_valid": true, "glyph_recog_text": "BEASTE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061159.jpg", "caption": "a man is working on a car with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454399.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061202.jpg", "caption": "a woman eating a doughnut in a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061203.jpg", "caption": "a kitchen with a refrigerator and a plant in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454422.jpg", "caption": "a steam train is blowing smoke on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192290.jpg", "caption": "a woman in a bathroom mirror taking a selfie", "annotations": [{"polygon": [[240, 356], [236, 378], [310, 405], [314, 386]], "text": "OMER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "OMER", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192320.jpg", "caption": "a laptop computer sitting on a counter top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061266.jpg", "caption": "people waiting at a train station with luggage and luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192348.jpg", "caption": "two men sitting down", "annotations": [{"polygon": [[384, 284], [419, 300], [397, 341], [360, 377], [314, 357]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "5", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061293.jpg", "caption": "a group of people skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323448.jpg", "caption": "a young girl holding an umbrella on a rainy day", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323470.jpg", "caption": "a person is pouring water into a toilet", "annotations": [{"polygon": [[481, 418], [503, 449], [513, 449], [512, 435], [490, 409]], "text": "CON", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "CON", "recog_valid": true, "glyph_recog_text": "COr", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323489.jpg", "caption": "a car driving down a street with a truck on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323491.jpg", "caption": "a baseball player is pitching a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323519.jpg", "caption": "a motorcycle parked next to a wooden door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323536.jpg", "caption": "a group of people standing around a motorcycle show", "annotations": [{"polygon": [[470, 102], [473, 93], [480, 84], [497, 75], [499, 76], [501, 87], [493, 91], [485, 100], [484, 105], [483, 106], [474, 106]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "HE", "recog_valid": false, "glyph_recog_text": "THE", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061409.jpg", "caption": "a white and red ice cream truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192494.jpg", "caption": "a dragon boat is tied to a dock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192501.jpg", "caption": "a plate of chocolate cake and ice cream sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454647.jpg", "caption": "a person is holding a shoe with a horse's hoof", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454659.jpg", "caption": "a man standing next to a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061452.jpg", "caption": "a horse pulling a wagon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454680.jpg", "caption": "a traffic light with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454681.jpg", "caption": "an old train is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192539.jpg", "caption": "a crowd of people standing around a street with signs", "annotations": [{"polygon": [[460, 446], [495, 439], [506, 469], [465, 468]], "text": "SSA", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "", "recog_valid": false, "glyph_recog_text": "SSA", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[216, 385], [216, 385], [216, 385], [245, 410], [245, 410], [464, 391], [464, 391], [464, 391], [475, 365]], "text": "Exactly", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "E x a c t l y", "glyph_recog_ld": 7.692301774442356e-07}, {"polygon": [[196, 384], [196, 384], [196, 384], [196, 384], [338, 368], [338, 368], [327, 349], [184, 366]], "text": "NOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "NOT", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323612.jpg", "caption": "a cat sitting on a table next to a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454690.jpg", "caption": "a black and white photo of a television sitting on a shelf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454692.jpg", "caption": "two children sitting in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061478.jpg", "caption": "two women cutting a cake at a restaurant", "annotations": [{"polygon": [[128, 386], [119, 395], [100, 384], [91, 389], [70, 378], [63, 371], [71, 361], [75, 364], [72, 368], [94, 376], [100, 371], [111, 372], [114, 376], [121, 381], [124, 376], [132, 382]], "text": "Happy Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "haopngpoP eu k", "recog_valid": false, "glyph_recog_text": "appy Birtyday", "glyph_recog_ld": 0.14285775510160348}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454712.jpg", "caption": "a man is putting hot dogs in a tray", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061498.jpg", "caption": "people playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061506.jpg", "caption": "black and white photo of a horse grazing in the mountains", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323666.jpg", "caption": "a person riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061526.jpg", "caption": "a stop sign and a street sign on a pole", "annotations": [{"polygon": [[202, 209], [312, 210], [313, 255], [201, 256]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454745.jpg", "caption": "a green motorcycle is parked on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061535.jpg", "caption": "a group of cows laying down on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323695.jpg", "caption": "a traffic jam on a busy street with cars and trucks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454769.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061559.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454783.jpg", "caption": "graffiti on a train car with a sign has texts", "annotations": [{"polygon": [[198, 305], [192, 316], [197, 325], [202, 328], [216, 326], [262, 307], [262, 285], [245, 279], [220, 283]], "text": "Grfrrr!", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "cnrt", "recog_valid": false, "glyph_recog_text": "Grfrrr!", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[94, 276], [94, 324], [148, 352], [164, 328], [174, 265]], "text": "pepe", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "晓", "recog_valid": false, "glyph_recog_text": ":", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454789.jpg", "caption": "a row of red double decker buses parked in a lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323728.jpg", "caption": "two women sitting at a table with laptops", "annotations": [{"polygon": [[350, 170], [351, 163], [356, 152], [363, 143], [371, 138], [388, 136], [409, 140], [409, 143], [406, 150], [396, 147], [380, 148], [371, 150], [363, 160], [360, 174]], "text": "COOKIE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SaEAMMS", "recog_valid": false, "glyph_recog_text": "COOKIE", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323733.jpg", "caption": "a television in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323737.jpg", "caption": "a purple bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192669.jpg", "caption": "a blue and white airplane taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454821.jpg", "caption": "a yellow bus on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061642.jpg", "caption": "a laptop computer sitting on a desk next to a monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323789.jpg", "caption": "a man in a military uniform sitting at a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192722.jpg", "caption": "a man holding an umbrella in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323813.jpg", "caption": "a dead end sign on a sidewalk", "annotations": [{"polygon": [[0, 295], [157, 300], [162, 359], [1, 358], [3, 359]], "text": "DEAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "DEAD", "recog_valid": true, "glyph_recog_text": "DEAD", "glyph_recog_ld": 1.0}, {"polygon": [[17, 375], [144, 375], [143, 436], [18, 435]], "text": "END", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "END", "recog_valid": true, "glyph_recog_text": "END", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192747.jpg", "caption": "a man walking a horse in a dirt arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061683.jpg", "caption": "three umbrellas with writing on them", "annotations": [{"polygon": [[129, 273], [133, 205], [295, 177], [295, 252]], "text": "SCE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SCE", "recog_valid": true, "glyph_recog_text": "SCE", "glyph_recog_ld": 1.0}, {"polygon": [[315, 224], [354, 199], [357, 223], [319, 242]], "text": "JUST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JUST", "recog_valid": true, "glyph_recog_text": "JUST", "glyph_recog_ld": 1.0}, {"polygon": [[414, 92], [389, 163], [403, 169], [431, 94]], "text": " P S U", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "cnT", "recog_valid": false, "glyph_recog_text": "PSU", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[419, 174], [414, 202], [504, 206], [504, 180]], "text": "DANC", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "DANC", "recog_valid": true, "glyph_recog_text": "DANC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454916.jpg", "caption": "a man riding a skateboard on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192774.jpg", "caption": "a person riding a motorcycle on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192795.jpg", "caption": "santa teresa costa rica postcard", "annotations": [{"polygon": [[103, 128], [102, 159], [245, 157], [239, 127]], "text": "SANTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SANTA", "recog_valid": true, "glyph_recog_text": "SANTA", "glyph_recog_ld": 1.0}, {"polygon": [[251, 126], [251, 137], [258, 138], [260, 157], [413, 156], [404, 126]], "text": "TERESA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TERESA", "recog_valid": true, "glyph_recog_text": "TERESA", "glyph_recog_ld": 1.0}, {"polygon": [[117, 369], [119, 400], [381, 398], [371, 371]], "text": "COSTARICA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "COSTA RICA", "recog_valid": false, "glyph_recog_text": "COSTARICA", "glyph_recog_ld": 0.9000000999999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454940.jpg", "caption": "a motorcycle racer is riding on a track", "annotations": [{"polygon": [[207, 229], [204, 255], [216, 250], [230, 244], [237, 244], [242, 221], [219, 225]], "text": "61", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "61", "recog_valid": true, "glyph_recog_text": "61", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323868.jpg", "caption": "a delta airlines airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061729.jpg", "caption": "a street with cars parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061734.jpg", "caption": "a tennis player is about to hit a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192818.jpg", "caption": "a motorcycle parked in front of a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000454963.jpg", "caption": "a rocking horse and clock in black and white", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323896.jpg", "caption": "a motorcycle parked in a parking lot with a car in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192831.jpg", "caption": "three trains are parked on the tracks near each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192835.jpg", "caption": "a man holding a laptop with the words barack obama built you a robot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000323917.jpg", "caption": "a young woman wearing a tank top with the words state greek on it", "annotations": [{"polygon": [[172, 306], [176, 295], [206, 296], [235, 299], [276, 306], [290, 312], [289, 343], [256, 337], [232, 331], [210, 329], [188, 327], [172, 329]], "text": "GREEK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GRZZK", "recog_valid": false, "glyph_recog_text": "GREEK", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[228, 279], [224, 296], [234, 298], [248, 301], [268, 304], [289, 310], [292, 307], [294, 292], [268, 284], [261, 283]], "text": "STATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STATE", "recog_valid": true, "glyph_recog_text": "STATE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455010.jpg", "caption": "a stop sign and street sign on a street corner", "annotations": [{"polygon": [[256, 194], [325, 200], [327, 213], [321, 218], [316, 217], [314, 232], [252, 227], [248, 218], [250, 205], [250, 199]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192867.jpg", "caption": "a boy playing a video game on a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061809.jpg", "caption": "a baseball player throwing a pitch on a field", "annotations": [{"polygon": [[101, 233], [112, 223], [128, 212], [132, 217], [130, 228], [118, 243], [109, 246], [105, 242]], "text": "Flying", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Fiying", "recog_valid": false, "glyph_recog_text": "Flying", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455037.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061822.jpg", "caption": "a car driving down a street with a few people walking", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061844.jpg", "caption": "a baseball player running to home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192928.jpg", "caption": "a pizza on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324000.jpg", "caption": "a man in a car using a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000192955.jpg", "caption": "a horse and a trolley car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324052.jpg", "caption": "a skateboarder is doing a trick on a rail", "annotations": [{"polygon": [[135, 388], [136, 427], [175, 427], [175, 390]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455132.jpg", "caption": "a train on the tracks with a bridge over it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193011.jpg", "caption": "a brown bear laying down", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061945.jpg", "caption": "a taxi cab driving down a street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061946.jpg", "caption": "a stuffed animal on a plate with a sandwich", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193023.jpg", "caption": "a young boy is standing next to a yellow fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061951.jpg", "caption": "a narrow alley with people on motorcycles and a man on a motorbike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324115.jpg", "caption": "a pizza oven in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193044.jpg", "caption": "a group of elephants standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324124.jpg", "caption": "radio city music hall, new york city, new york, usa", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193062.jpg", "caption": "a sandwich on a plate", "annotations": [{"polygon": [[273, 191], [328, 174], [326, 208], [275, 232]], "text": "Dr Pepper", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Pooy", "recog_valid": false, "glyph_recog_text": "Or Peppe", "glyph_recog_ld": 0.12500109374863277}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000061992.jpg", "caption": "a bus driving down a street with tall buildings in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193094.jpg", "caption": "a stop sign and a sign has texts", "annotations": [{"polygon": [[142, 222], [142, 233], [295, 209], [295, 196]], "text": "CONVERTION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CONVENTIOT", "recog_valid": false, "glyph_recog_text": "CONVERTION", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193095.jpg", "caption": "a bathroom with a glass wall and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062031.jpg", "caption": "a skier is flying through the air while others watch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062038.jpg", "caption": "a table with wine glasses and a plate of food", "annotations": [{"polygon": [[235, 369], [215, 380], [219, 394], [231, 395], [253, 383], [271, 361], [269, 358]], "text": "Migcation", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Aigualien", "recog_valid": false, "glyph_recog_text": "Migcatior", "glyph_recog_ld": 0.44444506172770915}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062053.jpg", "caption": "a laptop computer sitting on a desk with a guitar and other electronics", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324200.jpg", "caption": "a long sandwich on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193148.jpg", "caption": "a wii with a controller, a game and a dvd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193166.jpg", "caption": "a red car parked next to a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455311.jpg", "caption": "a wooden fence with many old cell phones on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324246.jpg", "caption": "a parking meter on the beach near the ocean", "annotations": [{"polygon": [[251, 111], [255, 165], [265, 164], [263, 144], [284, 142], [291, 137], [294, 126], [293, 117], [285, 110], [279, 109]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "a", "recog_valid": false, "glyph_recog_text": "a", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455333.jpg", "caption": "a double decker bus parked in a garage", "annotations": [{"polygon": [[118, 189], [119, 208], [150, 201], [151, 178]], "text": "BUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BUS", "recog_valid": true, "glyph_recog_text": "BUS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455340.jpg", "caption": "two women getting on a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062129.jpg", "caption": "a man standing next to a cow in a barn", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062132.jpg", "caption": "the bears were left in the back of a lorry in the village of yorkshire, where the british army was stationed during the second world war", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324280.jpg", "caption": "a woman walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193213.jpg", "caption": "a baseball player is swinging at a ball", "annotations": [{"polygon": [[349, 143], [365, 174], [385, 167], [371, 138]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "乌", "recog_valid": false, "glyph_recog_text": "10", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324286.jpg", "caption": "a group of surfboards on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455366.jpg", "caption": "an older woman is sitting at a table with a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455369.jpg", "caption": "a group of people standing around a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062160.jpg", "caption": "a white and yellow train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193243.jpg", "caption": "a street sign on a city street with cars driving by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324316.jpg", "caption": "two men standing on a street corner", "annotations": [{"polygon": [[435, 383], [445, 380], [511, 377], [512, 418], [494, 420]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "ST", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193254.jpg", "caption": "a black cat eating a piece of food on a plate", "annotations": [{"polygon": [[466, 268], [466, 282], [466, 282], [413, 266], [413, 266], [418, 251], [418, 251]], "text": "DANIELLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "PINIELLE", "recog_valid": false, "glyph_recog_text": "DAMIELLE", "glyph_recog_ld": 0.6250004687494141}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455406.jpg", "caption": "a dog standing on its hind legs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455416.jpg", "caption": "a table with two plates of food and a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062204.jpg", "caption": "a large red truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193293.jpg", "caption": "a traffic light with a sign that says no parking", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324366.jpg", "caption": "a tennis player on a court with a net", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062226.jpg", "caption": "a man in a blue shirt is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062233.jpg", "caption": "four people standing in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193327.jpg", "caption": "a baseball game in progress with a batter at bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062261.jpg", "caption": "a stuffed animal sitting on a chair with a wii remote", "annotations": [{"polygon": [[274, 217], [375, 206], [379, 229], [279, 241]], "text": "NYONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ANYONE", "recog_valid": false, "glyph_recog_text": "NYONE", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[284, 274], [397, 255], [403, 282], [292, 302]], "text": "TENNIS ?", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TENNIS?", "recog_valid": false, "glyph_recog_text": "TENNIS ?", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324427.jpg", "caption": "a fruit stand with many different types of fruit", "annotations": [{"polygon": [[41, 53], [36, 65], [136, 84], [141, 70]], "text": "GRACIAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "GRACIAS", "recog_valid": true, "glyph_recog_text": "GRACIAS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324428.jpg", "caption": "boats in the harbor of algiers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193359.jpg", "caption": "a man standing on a street corner talking on his cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455505.jpg", "caption": "a sign on a pole with a parking meter in the background", "annotations": [{"polygon": [[204, 339], [294, 337], [292, 371], [202, 373]], "text": "NE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NE", "recog_valid": true, "glyph_recog_text": "N E", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[261, 110], [261, 182], [276, 182], [277, 154], [294, 153], [306, 142], [308, 124], [300, 113], [289, 109]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "a", "recog_valid": false, "glyph_recog_text": "a", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193385.jpg", "caption": "a person skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062324.jpg", "caption": "a group of people cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193398.jpg", "caption": "a white cabinet with a clock and a plate on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455545.jpg", "caption": "people walking through an airport with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062337.jpg", "caption": "a horse eating grass in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324488.jpg", "caption": "a meal on an airplane with pasta, salad and bread", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062387.jpg", "caption": "a busy street with cars, motorcycles, and pedestrians", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062390.jpg", "caption": "a pizza with pepperoni and cheese on a table", "annotations": [{"polygon": [[82, 154], [84, 152], [92, 151], [102, 144], [107, 130], [113, 131], [111, 140], [107, 148], [100, 155], [93, 159], [85, 161], [83, 160], [82, 157]], "text": "QUALITY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "OUALUT", "recog_valid": false, "glyph_recog_text": "QUALITY", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193465.jpg", "caption": "a laptop computer with headphones and a mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193479.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455639.jpg", "caption": "a boy swinging a baseball bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455649.jpg", "caption": "a sandwich and chips on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324591.jpg", "caption": "three women in blue jackets standing next to sheep", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062455.jpg", "caption": "a woman with red hair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455677.jpg", "caption": "a woman and a girl in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193540.jpg", "caption": "a group of people playing frisbee in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062472.jpg", "caption": "a tennis player is about to hit the ball", "annotations": [{"polygon": [[177, 216], [175, 261], [434, 257], [434, 215]], "text": "BACLAYS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BAOCLAYS", "recog_valid": false, "glyph_recog_text": "BACLAYS", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[-3, 149], [-1, 197], [120, 197], [123, 148]], "text": "VS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "v s", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[151, 193], [151, 150], [360, 146], [360, 197]], "text": "ATP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ATP", "recog_valid": true, "glyph_recog_text": "ATP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324635.jpg", "caption": "four small pizzas on a wooden board with a menu", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324643.jpg", "caption": "a man is walking in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324650.jpg", "caption": "a man carrying a surfboard on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193578.jpg", "caption": "a cat laying on top of a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062541.jpg", "caption": "a man on a skateboard doing tricks on a ramp", "annotations": [{"polygon": [[435, 186], [434, 233], [230, 226], [231, 190]], "text": "MONKEYDRIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MONKEYDRIVE", "recog_valid": true, "glyph_recog_text": "MONKEYDRIVE", "glyph_recog_ld": 1.0}, {"polygon": [[231, 229], [435, 237], [434, 264], [232, 247]], "text": "TEXTILDRUCK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TEXTILDRUCK", "recog_valid": true, "glyph_recog_text": "TEXTILDRUCK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324688.jpg", "caption": "a man walks through the snow on a street in finland", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193621.jpg", "caption": "a bike is parked next to a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455776.jpg", "caption": "a young boy in a green shirt and white pants", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324706.jpg", "caption": "a worker is loading a plane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324709.jpg", "caption": "people standing in line to get food from a food truck", "annotations": [{"polygon": [[26, 251], [26, 280], [68, 281], [68, 251]], "text": "Red", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Ru", "recog_valid": false, "glyph_recog_text": "Red", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[69, 255], [126, 252], [127, 279], [69, 283]], "text": "Rooster", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Hae", "recog_valid": false, "glyph_recog_text": "Rooster", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193654.jpg", "caption": "a man in a suit and tie giving a speech to a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324732.jpg", "caption": "a woman holding a sign in front of a refrigerator", "annotations": [{"polygon": [[382, 86], [387, 86], [392, 87], [395, 89], [449, 113], [450, 117], [449, 133], [446, 134], [440, 134], [384, 114], [380, 110], [379, 106], [379, 101], [381, 99], [381, 99], [380, 97], [380, 93], [380, 89], [381, 87]], "text": "SAMSUNG", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "SAMSUNG", "recog_valid": true, "glyph_recog_text": "SAMSUING", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455808.jpg", "caption": "a stop sign is shown in front of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062604.jpg", "caption": "a church with pews and stained glass windows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324754.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062613.jpg", "caption": "a man in red shirt and black shorts is about to hit a tennis ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193692.jpg", "caption": "a person sitting on a bench looking out to the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324772.jpg", "caption": "a man and his dog on a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455860.jpg", "caption": "taxi tai lao ca", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062652.jpg", "caption": "a cat standing on its hind legs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193732.jpg", "caption": "a street sign with two signs on it", "annotations": [{"polygon": [[129, 376], [314, 301], [308, 257], [128, 334]], "text": "BALTIMORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BALTIMORE", "recog_valid": true, "glyph_recog_text": "BALTIMORE", "glyph_recog_ld": 1.0}, {"polygon": [[344, 254], [378, 240], [373, 223], [341, 237]], "text": "100", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "100", "recog_valid": true, "glyph_recog_text": "100", "glyph_recog_ld": 1.0}, {"polygon": [[351, 262], [384, 247], [388, 272], [350, 286]], "text": "AVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AVE", "recog_valid": true, "glyph_recog_text": "AVE", "glyph_recog_ld": 1.0}, {"polygon": [[113, 114], [121, 150], [228, 208], [225, 179]], "text": "VIETNAM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VIETNAM", "recog_valid": true, "glyph_recog_text": "VIETNAM", "glyph_recog_ld": 1.0}, {"polygon": [[235, 187], [244, 219], [310, 254], [319, 232]], "text": "VETER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VETER", "recog_valid": true, "glyph_recog_text": "VETER", "glyph_recog_ld": 1.0}, {"polygon": [[135, 160], [136, 191], [263, 256], [263, 229]], "text": "MEMORIAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MEMORIAL", "recog_valid": true, "glyph_recog_text": "MEMORIAL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455877.jpg", "caption": "a bathroom sink with a toothbrush and a cup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455882.jpg", "caption": "a sign that says masendegade", "annotations": [{"polygon": [[140, 281], [328, 286], [409, 270], [408, 219], [142, 233]], "text": "Masnedogade", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Masnedegade", "recog_valid": false, "glyph_recog_text": "Masnedogade", "glyph_recog_ld": 0.909090991735462}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455893.jpg", "caption": "a woman standing behind a pile of bananas", "annotations": [{"polygon": [[11, 375], [12, 393], [87, 378], [85, 362]], "text": "BIERGY!", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "EIERGY", "recog_valid": false, "glyph_recog_text": "BIERGY!", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324831.jpg", "caption": "a group of airplanes parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324852.jpg", "caption": "a bus parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193785.jpg", "caption": "a street light and a building with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324877.jpg", "caption": "a large boat is docked in a harbor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000455952.jpg", "caption": "a stop sign and a red and white sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062745.jpg", "caption": "a cake with a cell phone on it", "annotations": [{"polygon": [[62, 340], [83, 341], [83, 352], [125, 357], [130, 366], [119, 374], [76, 369], [59, 376], [52, 368], [61, 361], [58, 351]], "text": "you", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "youe", "recog_valid": false, "glyph_recog_text": "you", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[143, 359], [152, 352], [170, 353], [191, 355], [202, 354], [207, 346], [212, 343], [219, 351], [214, 355], [241, 355], [251, 350], [257, 345], [263, 341], [270, 349], [282, 357], [299, 359], [303, 370], [314, 363], [319, 379], [302, 383], [273, 379], [163, 372], [146, 388], [135, 383], [133, 373], [140, 371]], "text": "graduation", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "graduation", "recog_valid": true, "glyph_recog_text": "graduation", "glyph_recog_ld": 1.0}, {"polygon": [[75, 311], [65, 336], [115, 355], [129, 355], [197, 338], [197, 315]], "text": "Congrats", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cargrats", "recog_valid": false, "glyph_recog_text": "Congrats", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[185, 284], [175, 311], [208, 314], [208, 293]], "text": "&", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "&", "recog_valid": false, "glyph_recog_text": "&.amp", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[106, 249], [98, 282], [232, 294], [233, 249]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bicttday", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[72, 212], [68, 249], [147, 255], [163, 228]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Happ", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062756.jpg", "caption": "mercedes-benz sprinter van", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324901.jpg", "caption": "a blue truck with a license plate on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062763.jpg", "caption": "two women standing in a kitchen looking at a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324909.jpg", "caption": "a man on a skateboard doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324915.jpg", "caption": "a person is kite surfing in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456003.jpg", "caption": "a dog sitting on a pink chair with two books", "annotations": [{"polygon": [[62, 264], [98, 253], [110, 249], [123, 246], [132, 245], [147, 241], [165, 240], [193, 239], [193, 257], [159, 258], [131, 261], [88, 271], [64, 282]], "text": "MATILDA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MATILDA", "recog_valid": true, "glyph_recog_text": "MATILDA", "glyph_recog_ld": 1.0}, {"polygon": [[400, 299], [407, 301], [443, 317], [452, 319], [453, 317], [448, 314], [450, 308], [397, 286], [396, 289], [398, 292], [396, 296]], "text": "Todd", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Toclel", "recog_valid": false, "glyph_recog_text": "Ya5d", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[390, 271], [379, 278], [389, 283], [397, 283], [426, 297], [454, 311], [474, 319], [476, 313], [475, 306], [400, 274]], "text": "Sweeney Todd", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Sweeney", "recog_valid": false, "glyph_recog_text": "Bweeney Todd", "glyph_recog_ld": 0.5000004166663194}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456009.jpg", "caption": "a keyboard and mouse are sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193867.jpg", "caption": "a view of a city street with cars and people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193879.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193902.jpg", "caption": "a group of people skiing down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000324989.jpg", "caption": "a blender with green smoothie in it", "annotations": [{"polygon": [[250, 340], [251, 350], [267, 344], [267, 344], [277, 340], [287, 334], [300, 325], [299, 319], [294, 321], [275, 331]], "text": "Cuisinart", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cuisinart", "recog_valid": true, "glyph_recog_text": "Cuasnanm", "glyph_recog_ld": 0.5555560493821674}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000193923.jpg", "caption": "a stop sign and a pedestrian crossing sign", "annotations": [{"polygon": [[341, 105], [330, 170], [280, 160], [300, 98]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "号", "recog_valid": false, "glyph_recog_text": "d01S", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062864.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[90, 152], [90, 152], [219, 109], [265, 113], [271, 117], [284, 185], [278, 187], [248, 173], [217, 181], [117, 217]], "text": "DONT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OOM", "recog_valid": false, "glyph_recog_text": "DONT", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[76, 253], [76, 253], [98, 325], [111, 332], [128, 335], [198, 310], [264, 293], [314, 275], [337, 221], [336, 203], [329, 189], [312, 180], [276, 190], [226, 204], [155, 228], [96, 244]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[145, 346], [151, 338], [172, 326], [194, 313], [218, 309], [220, 315], [229, 356], [215, 391], [212, 392], [196, 382], [170, 397], [160, 374]], "text": "IT'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "75", "recog_valid": false, "glyph_recog_text": "a", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[262, 287], [262, 287], [290, 283], [333, 286], [346, 342], [316, 348], [277, 357], [265, 355], [251, 340], [243, 320], [246, 307], [252, 300]], "text": "OK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "O", "recog_valid": false, "glyph_recog_text": "OK", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456083.jpg", "caption": "a steam engine train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325012.jpg", "caption": "a display case with several pizzas on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325042.jpg", "caption": "a man riding a bike with a dog on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194000.jpg", "caption": "a group of people holding up signs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062941.jpg", "caption": "united airlines boeing 737-800 nr 834", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325095.jpg", "caption": "a cow is standing in a pen with people watching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062958.jpg", "caption": "a group of police officers standing on motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000062964.jpg", "caption": "a train is parked at a station with a building in the background", "annotations": [{"polygon": [[445, 186], [444, 216], [477, 212], [478, 182]], "text": "3442", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "3442", "recog_valid": true, "glyph_recog_text": "3442", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325115.jpg", "caption": "a man jumping over a pole in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325132.jpg", "caption": "a large jetliner is landing on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325175.jpg", "caption": "a black and white sign", "annotations": [{"polygon": [[242, 124], [334, 158], [335, 169], [319, 205], [286, 192], [223, 166], [222, 156], [228, 135], [237, 123]], "text": "CROSS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CROSS", "recog_valid": true, "glyph_recog_text": "CROSS", "glyph_recog_ld": 1.0}, {"polygon": [[356, 167], [352, 173], [358, 177], [342, 214], [472, 269], [480, 262], [496, 229], [492, 219]], "text": "TRAFFIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "TRAFFIC", "recog_valid": true, "glyph_recog_text": "TRAFFIC", "glyph_recog_ld": 1.0}, {"polygon": [[166, 297], [233, 334], [239, 332], [242, 326], [251, 301], [253, 293], [251, 290], [182, 258]], "text": "DOES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DOES", "recog_valid": true, "glyph_recog_text": "DOES", "glyph_recog_ld": 1.0}, {"polygon": [[275, 302], [257, 343], [309, 372], [325, 335], [330, 337], [332, 331], [308, 317], [276, 304]], "text": "NOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NOT", "recog_valid": true, "glyph_recog_text": "NOT", "glyph_recog_ld": 1.0}, {"polygon": [[337, 387], [402, 420], [412, 400], [418, 401], [427, 399], [431, 392], [431, 381], [429, 376], [357, 342], [350, 342], [348, 344], [333, 379]], "text": "STO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": false, "glyph_recog_text": "STO", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063049.jpg", "caption": "a double decker bus with a man on the side", "annotations": [{"polygon": [[54, 198], [48, 257], [100, 244], [100, 194]], "text": "Speare", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "复", "recog_valid": false, "glyph_recog_text": "oae", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[26, 203], [15, 258], [47, 260], [53, 212]], "text": "Larry", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "营", "recog_valid": false, "glyph_recog_text": "Aue", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325220.jpg", "caption": "a busy city street at night with people walking and driving", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063079.jpg", "caption": "a parking meter with a colorful scarf wrapped around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456343.jpg", "caption": "a man is windsurfing in a flooded park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194200.jpg", "caption": "a baseball game in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456350.jpg", "caption": "a clock tower with a santa clause on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456351.jpg", "caption": "a stop sign with a pedestrian crossing sign", "annotations": [{"polygon": [[193, 134], [356, 97], [352, 46], [193, 89]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194262.jpg", "caption": "a clock on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456412.jpg", "caption": "a living room with a flat screen tv and a fan", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456416.jpg", "caption": "a group of buses parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063206.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194294.jpg", "caption": "a woman standing in front of a picnic table with a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063230.jpg", "caption": "a restaurant with tables and chairs outside", "annotations": [{"polygon": [[390, 88], [421, 64], [435, 64], [435, 68], [393, 105]], "text": "CAFE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "CA2", "recog_valid": false, "glyph_recog_text": "CAFE", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[374, 118], [453, 63], [468, 107], [378, 158]], "text": "CREPE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "CREPE", "recog_valid": true, "glyph_recog_text": "CREPE", "glyph_recog_ld": 1.0}, {"polygon": [[252, 165], [253, 190], [253, 192], [296, 173], [297, 141]], "text": "CRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "CRE", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063252.jpg", "caption": "no nice house", "annotations": [{"polygon": [[132, 343], [191, 343], [186, 383], [127, 381]], "text": "HO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OH", "recog_valid": false, "glyph_recog_text": "HO", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[271, 346], [355, 349], [356, 387], [270, 383]], "text": "PIACE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "350J", "recog_valid": false, "glyph_recog_text": "PIACE", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[367, 348], [408, 347], [413, 388], [368, 388]], "text": "ON", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ON", "recog_valid": true, "glyph_recog_text": "ON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063263.jpg", "caption": "a boy running to first base", "annotations": [{"polygon": [[4, 315], [5, 309], [7, 309], [11, 308], [13, 305], [18, 302], [24, 305], [29, 307], [31, 314], [33, 319], [37, 322], [41, 332], [40, 339], [27, 343], [19, 339], [15, 334], [21, 330], [26, 333], [30, 333], [32, 332], [28, 326], [25, 326], [20, 318], [23, 317], [24, 316], [20, 311], [14, 313], [15, 317], [7, 322]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CO", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[155, 239], [165, 230], [166, 227], [168, 224], [174, 226], [175, 224], [175, 220], [177, 221], [179, 222], [180, 220], [184, 220], [185, 220], [189, 219], [194, 226], [158, 260]], "text": "Athletic", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "t", "recog_valid": false, "glyph_recog_text": "Athlatic", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[317, 188], [323, 203], [334, 201], [341, 200], [350, 195], [361, 189], [370, 185], [371, 182], [369, 176], [367, 173], [358, 172], [353, 170], [342, 175], [334, 178], [327, 184]], "text": "Athletic", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "sRlet", "recog_valid": false, "glyph_recog_text": "Athletic", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063270.jpg", "caption": "a tennis racket is hanging on a pole in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194346.jpg", "caption": "a man on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063275.jpg", "caption": "a large room with a lot of toilets and a lot of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456499.jpg", "caption": "a woman is standing on the sidewalk waiting for a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456500.jpg", "caption": "a woman smiling at a cake with a monkey on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456505.jpg", "caption": "two police officers on motorcycles on a rainy day", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325434.jpg", "caption": "a yellow and blue train sitting in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325444.jpg", "caption": "a street light with green banners hanging from it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063306.jpg", "caption": "a series of photos of a skateboarder doing tricks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456522.jpg", "caption": "a bride and groom walk down the aisle at a wedding ceremony", "annotations": [{"polygon": [[133, 401], [142, 382], [88, 384], [85, 378], [64, 381], [67, 401], [90, 399], [83, 408], [89, 408], [94, 400]], "text": "Captured", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Cmed", "recog_valid": false, "glyph_recog_text": "Captured", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456545.jpg", "caption": "a man on a skateboard", "annotations": [{"polygon": [[316, 69], [316, 100], [389, 98], [389, 68]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063334.jpg", "caption": "a baseball player pitching a ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325487.jpg", "caption": "a baseball game in progress with a pitcher throwing the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325530.jpg", "caption": "a baseball field with a baseball player on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194461.jpg", "caption": "a street with a lot of signs and a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456606.jpg", "caption": "a cup of tea and a notebook on a table", "annotations": [{"polygon": [[9, 266], [139, 312], [142, 306], [12, 260]], "text": "KAPILLARZONENELEKTROPHORESE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "KAEULARZONNEHEKTROPHORESE", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 3.999998400283644e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325545.jpg", "caption": "a cutting board with carrots, onions, garlic, and a knife", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325548.jpg", "caption": "a bag of oranges and a bag of pears", "annotations": [{"polygon": [[378, 364], [388, 385], [415, 374], [401, 354]], "text": "SPB", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SPB", "recog_valid": true, "glyph_recog_text": "SPB", "glyph_recog_ld": 1.0}, {"polygon": [[266, 173], [275, 185], [329, 153], [319, 147]], "text": "500 103", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CO1 005 0", "recog_valid": false, "glyph_recog_text": "500103", "glyph_recog_ld": 0.22222308641879285}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456629.jpg", "caption": "united airlines plane at the gate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063421.jpg", "caption": "a motorcycle with a side car parked next to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456640.jpg", "caption": "two women in blue and white soccer uniforms playing soccer", "annotations": [{"polygon": [[476, 181], [476, 180], [476, 223], [513, 223], [512, 176], [489, 173]], "text": "C", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "c", "recog_valid": false, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[316, 246], [313, 297], [506, 290], [504, 246]], "text": "TORADE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "YTORADE", "recog_valid": false, "glyph_recog_text": "TORADE", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[0, 260], [0, 308], [145, 303], [142, 255]], "text": "ATOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ATORG", "recog_valid": false, "glyph_recog_text": "ATOR", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063426.jpg", "caption": "a man in a white shirt and shorts playing tennis", "annotations": [{"polygon": [[120, 335], [120, 335], [200, 336], [201, 399], [117, 397]], "text": "IIBM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IEM", "recog_valid": false, "glyph_recog_text": "IIBM", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[242, 242], [344, 164], [352, 186], [333, 203], [304, 232], [272, 256], [259, 269]], "text": "SPORT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPORT", "recog_valid": true, "glyph_recog_text": "SPORT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456648.jpg", "caption": "a man on a motorcycle riding down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325590.jpg", "caption": "a blue and white motorcycle parked on the side of the road", "annotations": [{"polygon": [[30, 328], [0, 348], [0, 356], [31, 360], [43, 354], [55, 331]], "text": "S", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "s", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063446.jpg", "caption": "a car is parked in front of a wall with a sign has texts", "annotations": [{"polygon": [[16, 131], [49, 131], [57, 138], [57, 157], [51, 167], [15, 166], [11, 164], [13, 131]], "text": "50", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "50", "recog_valid": true, "glyph_recog_text": "50", "glyph_recog_ld": 1.0}, {"polygon": [[98, 134], [95, 168], [139, 166], [144, 157], [140, 147], [132, 145], [133, 139], [139, 132], [105, 130]], "text": "26", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "26", "recog_valid": true, "glyph_recog_text": "26", "glyph_recog_ld": 1.0}, {"polygon": [[272, 154], [275, 194], [338, 192], [340, 179], [338, 156]], "text": "95", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "95", "recog_valid": true, "glyph_recog_text": "95", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456666.jpg", "caption": "a mouse and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194538.jpg", "caption": "a close up of a stuffed animal with a tag on it", "annotations": [{"polygon": [[376, 297], [385, 358], [476, 338], [434, 275]], "text": "TOYS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "架", "recog_valid": false, "glyph_recog_text": "TOYS", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[286, 379], [286, 379], [256, 408], [286, 412], [292, 380]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "A", "recog_valid": true, "glyph_recog_text": "<", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194542.jpg", "caption": "a red train is traveling on a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063473.jpg", "caption": "a steam train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063495.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[262, 167], [264, 176], [363, 130], [365, 126], [362, 120], [360, 120]], "text": "FLINDERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FLINDERS", "recog_valid": true, "glyph_recog_text": "FLENDE男器", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[301, 160], [302, 170], [422, 115], [416, 106]], "text": "FRANKLIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FRANKLIN", "recog_valid": true, "glyph_recog_text": "中RAUxLIN", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[266, 197], [267, 206], [313, 186], [310, 176]], "text": "PILGRIM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PILGRIM", "recog_valid": true, "glyph_recog_text": "POH", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[314, 173], [316, 182], [365, 161], [361, 151]], "text": "UNITING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNITING", "recog_valid": true, "glyph_recog_text": "HNTNO", "glyph_recog_ld": 0.428572244896793}, {"polygon": [[290, 196], [292, 207], [342, 184], [338, 173]], "text": "CHURCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CHURCH", "recog_valid": true, "glyph_recog_text": "CHURCH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325654.jpg", "caption": "a donut with a fork and a bottle of beer", "annotations": [{"polygon": [[370, 109], [364, 125], [383, 137], [380, 139], [401, 144], [401, 140], [423, 139], [430, 126], [412, 129], [401, 129], [390, 125], [384, 121]], "text": "Pepper", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Peppe", "recog_valid": false, "glyph_recog_text": "Peppe", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456761.jpg", "caption": "a box of donuts and a cake sitting on a counter", "annotations": [{"polygon": [[1, 272], [7, 304], [277, 326], [280, 293]], "text": "DUNKIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DIULHLY", "recog_valid": false, "glyph_recog_text": "DUNKIN", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325710.jpg", "caption": "a man sitting on a bench eating a hot dog", "annotations": [{"polygon": [[140, 421], [142, 434], [160, 428], [162, 423], [192, 408], [184, 393], [155, 410]], "text": "CWRA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "GURA", "recog_valid": false, "glyph_recog_text": "CWRA", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194676.jpg", "caption": "a stop sign and a traffic light on a street", "annotations": [{"polygon": [[327, 79], [376, 66], [385, 52], [381, 44], [326, 60]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[323, 137], [362, 129], [365, 147], [322, 159]], "text": "RED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RED", "recog_valid": true, "glyph_recog_text": "RED", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194698.jpg", "caption": "a small dog laying on a man's arm", "annotations": [{"polygon": [[116, 117], [183, 118], [184, 157], [118, 157]], "text": "U.S.", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "U.S", "recog_valid": false, "glyph_recog_text": "U.S.", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[204, 119], [227, 121], [249, 126], [274, 123], [280, 158], [266, 159], [242, 155], [214, 156], [206, 155]], "text": "OUT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OUT", "recog_valid": true, "glyph_recog_text": "OUT", "glyph_recog_ld": 1.0}, {"polygon": [[287, 120], [328, 110], [334, 145], [292, 154]], "text": "OF", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OF", "recog_valid": true, "glyph_recog_text": "OF", "glyph_recog_ld": 1.0}, {"polygon": [[102, 158], [148, 158], [192, 158], [221, 158], [238, 156], [287, 159], [316, 153], [346, 144], [355, 184], [306, 188], [277, 190], [235, 191], [173, 193], [102, 198]], "text": "U.S.OUT OF CALIFORNIA ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CALIFORN", "recog_valid": false, "glyph_recog_text": "U.S.OUT OF CALIFORNIA", "glyph_recog_ld": 0.380952675736821}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325775.jpg", "caption": "turkish airlines boeing 777-300ER", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194707.jpg", "caption": "a fishing boat is docked in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325786.jpg", "caption": "a380 el gigante del aeropuerto", "annotations": [{"polygon": [[312, 425], [330, 389], [440, 388], [449, 393], [455, 407], [452, 420], [442, 427], [346, 427]], "text": "A380", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "A380", "recog_valid": true, "glyph_recog_text": "A380", "glyph_recog_ld": 1.0}, {"polygon": [[418, 443], [419, 474], [485, 473], [481, 445]], "text": "DEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "DEL", "recog_valid": true, "glyph_recog_text": "DEL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325793.jpg", "caption": "a polar bear eating out of a bowl", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194726.jpg", "caption": "a street sign with a picture of a person on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194738.jpg", "caption": "a woman is holding a plate of food while a man is eating", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456886.jpg", "caption": "a street with many shops and people walking around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063681.jpg", "caption": "a group of people in colorful costumes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456900.jpg", "caption": "a bird cage with a lizard in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000456936.jpg", "caption": "two women standing in a mall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325903.jpg", "caption": "a trophy on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000194879.jpg", "caption": "a man in a suit and tie standing in front of a display of items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325956.jpg", "caption": "a skateboarder in the air doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457033.jpg", "caption": "a woman is leading a horse around an arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063818.jpg", "caption": "a group of elephants are walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457035.jpg", "caption": "a large truck hauling a boat down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063820.jpg", "caption": "two canadian air canada planes parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000325966.jpg", "caption": "a man and woman sitting on a bench with laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063845.jpg", "caption": "a table with cupcakes and a birthday cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457067.jpg", "caption": "sony ericsson k800i - samsung galaxy s3", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063860.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063874.jpg", "caption": "a white train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457095.jpg", "caption": "a person using an electric mixer to make a sauce", "annotations": [{"polygon": [[166, 86], [169, 95], [192, 88], [207, 81], [218, 74], [218, 65], [208, 70], [203, 75], [190, 79], [168, 85]], "text": "toastmaster", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "toastmaster", "recog_valid": true, "glyph_recog_text": "LOSEA", "glyph_recog_ld": 9.090900826569381e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063879.jpg", "caption": "a desk with a clock, pens, and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063881.jpg", "caption": "a young boy kicking a soccer ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457121.jpg", "caption": "a pizza shop with a sign has texts", "annotations": [{"polygon": [[115, 104], [114, 130], [180, 124], [176, 96]], "text": "PIZZA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PIZZA", "recog_valid": true, "glyph_recog_text": "PIZZA", "glyph_recog_ld": 1.0}, {"polygon": [[248, 140], [246, 166], [348, 193], [345, 172]], "text": "ESPRESSA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ESPRISSA", "recog_valid": false, "glyph_recog_text": "ESPRESSA", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[23, 232], [22, 275], [92, 277], [92, 228]], "text": "PIZZA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "PIZZA", "recog_valid": true, "glyph_recog_text": "PIZZA", "glyph_recog_ld": 1.0}, {"polygon": [[195, 123], [193, 150], [238, 162], [238, 137]], "text": "ZA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZA", "recog_valid": true, "glyph_recog_text": "ZA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457146.jpg", "caption": "a bed with a qr code on it and a love love love bedding", "annotations": [{"polygon": [[17, 234], [20, 206], [137, 219], [137, 244]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "LOVE", "recog_valid": true, "glyph_recog_text": "LOVE", "glyph_recog_ld": 1.0}, {"polygon": [[86, 406], [88, 366], [-1, 364], [0, 405]], "text": "YE", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "UE", "recog_valid": false, "glyph_recog_text": "Y E", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[17, 450], [17, 499], [73, 498], [73, 448]], "text": "90", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "90", "recog_valid": true, "glyph_recog_text": "90", "glyph_recog_ld": 1.0}, {"polygon": [[135, 413], [135, 460], [152, 473], [376, 480], [370, 417]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "LOUE", "recog_valid": false, "glyph_recog_text": "LOVE", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[497, 249], [506, 210], [488, 200], [374, 154], [358, 190]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "307", "recog_valid": false, "glyph_recog_text": "LOVE", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[413, 8], [408, 25], [449, 38], [457, 23]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "LO", "recog_valid": false, "glyph_recog_text": "LOVE", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[280, 18], [274, 38], [327, 52], [332, 34]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LOVE", "recog_valid": true, "glyph_recog_text": "LOVE", "glyph_recog_ld": 1.0}, {"polygon": [[400, 52], [392, 73], [432, 84], [439, 65]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "LOV", "recog_valid": false, "glyph_recog_text": "LOVE", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[336, 34], [330, 54], [392, 71], [395, 52]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LOVE", "recog_valid": true, "glyph_recog_text": "LOVE", "glyph_recog_ld": 1.0}, {"polygon": [[373, 21], [370, 40], [432, 57], [436, 39]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "LOVE", "recog_valid": true, "glyph_recog_text": "LOVE", "glyph_recog_ld": 1.0}, {"polygon": [[310, 2], [306, 21], [368, 41], [371, 21]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LOVE", "recog_valid": true, "glyph_recog_text": "LOVE", "glyph_recog_ld": 1.0}, {"polygon": [[361, 67], [358, 86], [418, 103], [421, 84]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "LOVE", "recog_valid": true, "glyph_recog_text": "LOVE", "glyph_recog_ld": 1.0}, {"polygon": [[299, 49], [294, 67], [354, 84], [358, 66]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LOUE", "recog_valid": false, "glyph_recog_text": "LOVE", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[323, 79], [320, 98], [380, 116], [383, 99]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LOVE", "recog_valid": true, "glyph_recog_text": "LOVE", "glyph_recog_ld": 1.0}, {"polygon": [[350, 111], [345, 128], [406, 148], [412, 130]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LOUE", "recog_valid": false, "glyph_recog_text": "LOVE", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[287, 93], [284, 113], [343, 127], [347, 109]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LOUE", "recog_valid": false, "glyph_recog_text": "LOVE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457149.jpg", "caption": "a person on a snowboard doing a trick on a ramp", "annotations": [{"polygon": [[222, 300], [297, 283], [298, 291], [223, 350]], "text": "Birch", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIRe", "recog_valid": false, "glyph_recog_text": "Birch", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[225, 348], [290, 299], [306, 339], [227, 352]], "text": "HILL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "07", "recog_valid": false, "glyph_recog_text": "HiLL", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[422, 303], [468, 261], [481, 295], [426, 312]], "text": "HILL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "eac", "recog_valid": false, "glyph_recog_text": "HILL", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[421, 303], [474, 256], [472, 248], [415, 262]], "text": "BIRCH HILL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BTRO", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000063943.jpg", "caption": "a street with cars parked on both sides of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457168.jpg", "caption": "a group of people standing around a table with plates of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326098.jpg", "caption": "a desk with a computer and a chair in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326108.jpg", "caption": "a man in a white shirt and a man in a yellow vest", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326113.jpg", "caption": "a woman sitting on a lifeguard chair with a purple umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326124.jpg", "caption": "a woman holding a wii remote", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195058.jpg", "caption": "a living room with a fireplace and a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195062.jpg", "caption": "a little girl laying on a bed with a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457219.jpg", "caption": "a door on a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064009.jpg", "caption": "a store with a sign that says quick grocery", "annotations": [{"polygon": [[196, 68], [271, 107], [277, 117], [274, 131], [203, 102], [189, 90], [186, 82], [189, 70]], "text": "quick", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Quick", "recog_valid": false, "glyph_recog_text": "quick", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[95, 83], [92, 109], [96, 115], [104, 124], [365, 214], [368, 208], [366, 192], [362, 187], [325, 170], [250, 138], [155, 98], [104, 78]], "text": "GROCERIES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GROCERIES", "recog_valid": true, "glyph_recog_text": "GROCERIES", "glyph_recog_ld": 1.0}, {"polygon": [[86, 174], [126, 184], [124, 204], [85, 194]], "text": "NJ", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ONI", "recog_valid": false, "glyph_recog_text": "NJ", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[87, 144], [122, 155], [121, 176], [85, 162]], "text": "NJ", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ONJ", "recog_valid": false, "glyph_recog_text": "NJ", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457227.jpg", "caption": "a man standing on a rocky beach holding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064032.jpg", "caption": "a train is pulling into a station with a light on", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064036.jpg", "caption": "two soccer players are playing on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326207.jpg", "caption": "a green motorcycle parked on the side of the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326210.jpg", "caption": "a desk with two computers and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326224.jpg", "caption": "a man and a dog sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064086.jpg", "caption": "a rusty stop sign", "annotations": [{"polygon": [[139, 202], [368, 199], [386, 219], [386, 243], [379, 258], [356, 267], [344, 267], [339, 311], [142, 315], [128, 304], [120, 283], [123, 228], [130, 212]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326232.jpg", "caption": "a horse is walking through a field of trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064139.jpg", "caption": "a white apple mouse and keyboard sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064141.jpg", "caption": "a large pizza on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326290.jpg", "caption": "a scooter is parked in front of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064170.jpg", "caption": "a man on a motorcycle with a box on his back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457387.jpg", "caption": "a baseball player is swinging a bat at a ball", "annotations": [{"polygon": [[203, 329], [205, 359], [251, 358], [247, 327]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1.", "recog_valid": false, "glyph_recog_text": "13", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326320.jpg", "caption": "a man standing next to a pay phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064202.jpg", "caption": "three people on skis stand on top of a snow covered hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326350.jpg", "caption": "a group of baseball players standing on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326354.jpg", "caption": "a baseball player is running to catch a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457427.jpg", "caption": "a skateboarder in the air performing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195288.jpg", "caption": "a computer monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195316.jpg", "caption": "a man and a child on skis in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064270.jpg", "caption": "a black and white photo of people walking in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457503.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326442.jpg", "caption": "a street sign in a flooded area with a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457519.jpg", "caption": "a street light with a traffic light and a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064308.jpg", "caption": "a cubicle with a computer and a phone on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064317.jpg", "caption": "a large airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195391.jpg", "caption": "a group of people sitting on a large purple couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457555.jpg", "caption": "a truck with a white cover on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064350.jpg", "caption": "a woman eating a sandwich at a restaurant", "annotations": [{"polygon": [[1, 406], [60, 352], [79, 373], [1, 446]], "text": "EBB,", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "EBB", "recog_valid": false, "glyph_recog_text": "BB,", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326496.jpg", "caption": "a man in a tie and shirt walking down the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064356.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326499.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457583.jpg", "caption": "a green and white double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064389.jpg", "caption": "a cat standing in front of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064411.jpg", "caption": "a street sign with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195484.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[406, 189], [354, 219], [363, 232], [399, 237], [420, 227], [431, 205]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "一", "recog_valid": false, "glyph_recog_text": "P", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[0, 239], [-1, 268], [75, 268], [77, 238]], "text": "NGO", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NGO", "recog_valid": true, "glyph_recog_text": "NGO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326583.jpg", "caption": "a red bus is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326594.jpg", "caption": "apple crumble in a baking dish with apples and applesauce", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326613.jpg", "caption": "a sign that says carral closed at expo to cyclists", "annotations": [{"polygon": [[195, 224], [194, 254], [336, 292], [329, 267]], "text": "CARRALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CARRALL", "recog_valid": true, "glyph_recog_text": "CARRALL", "glyph_recog_ld": 1.0}, {"polygon": [[173, 255], [169, 286], [307, 318], [304, 291]], "text": "CLOSED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CLOSED", "recog_valid": true, "glyph_recog_text": "CLOSED", "glyph_recog_ld": 1.0}, {"polygon": [[324, 296], [321, 322], [352, 327], [350, 302]], "text": "AT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AT", "recog_valid": true, "glyph_recog_text": "AT", "glyph_recog_ld": 1.0}, {"polygon": [[196, 302], [194, 333], [287, 351], [287, 322]], "text": "EXPO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EXPO", "recog_valid": true, "glyph_recog_text": "EXPO", "glyph_recog_ld": 1.0}, {"polygon": [[301, 325], [304, 354], [342, 364], [340, 337]], "text": "TO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TO", "recog_valid": true, "glyph_recog_text": "TO", "glyph_recog_ld": 1.0}, {"polygon": [[194, 347], [190, 377], [354, 405], [349, 377]], "text": "CYCLISTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CYCLISTS", "recog_valid": true, "glyph_recog_text": "CYCLISTS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064492.jpg", "caption": "a woman in green shirt eating breakfast at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457724.jpg", "caption": "a group of people standing around a plane", "annotations": [{"polygon": [[299, 154], [397, 176], [390, 190], [290, 167], [290, 166]], "text": "Continental", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Continental", "recog_valid": true, "glyph_recog_text": "Continental", "glyph_recog_ld": 1.0}, {"polygon": [[403, 175], [497, 197], [496, 205], [491, 214], [393, 190]], "text": "Connection", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Connection", "recog_valid": true, "glyph_recog_text": "Connection", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064534.jpg", "caption": "a baseball player standing on a field", "annotations": [{"polygon": [[63, 118], [66, 156], [109, 157], [107, 117]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "10", "recog_valid": true, "glyph_recog_text": "10", "glyph_recog_ld": 1.0}, {"polygon": [[29, 246], [71, 246], [73, 210], [25, 209]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "10", "recog_valid": true, "glyph_recog_text": "10", "glyph_recog_ld": 1.0}, {"polygon": [[92, 245], [126, 247], [135, 209], [87, 209]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "10", "recog_valid": true, "glyph_recog_text": "10", "glyph_recog_ld": 1.0}, {"polygon": [[164, 260], [220, 269], [225, 244], [162, 227]], "text": "Phillie", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Phill", "recog_valid": false, "glyph_recog_text": "Phillie", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326706.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457781.jpg", "caption": "two women playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457791.jpg", "caption": "a cat laying on a person's lap", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457805.jpg", "caption": "a person on a dirt bike in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326750.jpg", "caption": "a train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457822.jpg", "caption": "a man is catching a frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457824.jpg", "caption": "a sign that says stop the drop", "annotations": [{"polygon": [[166, 93], [166, 146], [363, 146], [358, 97]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[153, 153], [152, 189], [241, 184], [240, 155]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THE", "recog_valid": true, "glyph_recog_text": "THE", "glyph_recog_ld": 1.0}, {"polygon": [[255, 153], [255, 189], [368, 187], [368, 155]], "text": "DROP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DROP", "recog_valid": true, "glyph_recog_text": "DROP", "glyph_recog_ld": 1.0}, {"polygon": [[150, 261], [148, 334], [372, 332], [377, 315], [377, 278], [368, 261]], "text": "1080", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1080", "recog_valid": true, "glyph_recog_text": "1080", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064627.jpg", "caption": "a group of people on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457860.jpg", "caption": "a vase with yellow flowers on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195724.jpg", "caption": "a table with a sandwich and coffee on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064681.jpg", "caption": "a person is eating a pizza with a knife and fork", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457904.jpg", "caption": "a white truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326832.jpg", "caption": "a baseball player swinging at a ball during a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064704.jpg", "caption": "a refrigerator with a lot of food in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457922.jpg", "caption": "a baseball player is standing at home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195797.jpg", "caption": "a parking meter on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064730.jpg", "caption": "a man standing next to a food stand with a kite flying in the background", "annotations": [{"polygon": [[127, 473], [127, 484], [142, 486], [152, 488], [161, 492], [175, 498], [182, 505], [183, 488], [168, 477], [149, 471], [131, 469]], "text": "PRETZELS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "RET2", "recog_valid": false, "glyph_recog_text": "PRETZELS", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000457948.jpg", "caption": "a bus is driving down the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195810.jpg", "caption": "a blue bus parked on the side of the road", "annotations": [{"polygon": [[120, 61], [254, 61], [257, 116], [128, 116]], "text": "184", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "184", "recog_valid": true, "glyph_recog_text": "184", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195812.jpg", "caption": "a qantas airplane flying in the sky with its landing gear down", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000326891.jpg", "caption": "a model of a fire truck on a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195826.jpg", "caption": "stop the hate repeal prop 8", "annotations": [{"polygon": [[171, 205], [244, 212], [245, 189], [173, 182]], "text": "HATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HATE", "recog_valid": true, "glyph_recog_text": "HATE", "glyph_recog_ld": 1.0}, {"polygon": [[264, 210], [267, 243], [302, 246], [304, 219]], "text": "H8", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "H8", "recog_valid": true, "glyph_recog_text": "H8", "glyph_recog_ld": 1.0}, {"polygon": [[104, 201], [103, 233], [146, 231], [141, 200]], "text": "H8", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HB", "recog_valid": false, "glyph_recog_text": "H8", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064772.jpg", "caption": "a man sitting on the ground reading a newspaper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195860.jpg", "caption": "a stop sign with a street sign on it", "annotations": [{"polygon": [[328, 161], [339, 156], [344, 165], [356, 161], [357, 145], [368, 141], [368, 155], [414, 138], [405, 177], [399, 177], [398, 169], [327, 196]], "text": "Railway", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Railiway", "recog_valid": false, "glyph_recog_text": "Railway", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[278, 201], [318, 207], [318, 224], [291, 220], [291, 244], [268, 240], [265, 222], [267, 203]], "text": "8TH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "8TH", "recog_valid": true, "glyph_recog_text": "8TH", "glyph_recog_ld": 1.0}, {"polygon": [[325, 244], [327, 215], [341, 214], [355, 218], [374, 229], [422, 236], [431, 228], [436, 228], [437, 237], [440, 239], [438, 244], [438, 259], [438, 266], [324, 247]], "text": "Street", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Street", "recog_valid": true, "glyph_recog_text": "Street", "glyph_recog_ld": 1.0}, {"polygon": [[293, 390], [382, 375], [390, 382], [392, 406], [388, 430], [377, 434], [373, 447], [299, 448], [286, 424], [288, 396]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064800.jpg", "caption": "a street with cars parked on it and a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458016.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195880.jpg", "caption": "a man in a blue shirt and white shorts is about to hit a tennis ball", "annotations": [{"polygon": [[53, 6], [52, 61], [110, 68], [137, 62], [171, 64], [185, 61], [184, 24]], "text": "PARI", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "PARI", "recog_valid": true, "glyph_recog_text": "PARI", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458025.jpg", "caption": "three dirt bikes racing on a sandy beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458027.jpg", "caption": "a sheep is in a parade with a banner that reads raddall sheep care", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195886.jpg", "caption": "a cat looking at itself in a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064816.jpg", "caption": "a man standing next to a statue of a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195903.jpg", "caption": "a man sitting at a table with a sandwich", "annotations": [{"polygon": [[79, 210], [86, 233], [97, 224], [105, 217], [113, 217], [119, 217], [117, 202], [108, 201], [98, 201], [88, 206]], "text": "Chee", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Ce", "recog_valid": false, "glyph_recog_text": "Chee", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[125, 210], [129, 228], [139, 229], [144, 219], [150, 215], [159, 213], [160, 203], [160, 195], [154, 194]], "text": "Cheeto", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0T", "recog_valid": false, "glyph_recog_text": "Cheato", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064834.jpg", "caption": "a laptop computer sitting on a bed with a cell phone on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458082.jpg", "caption": "a group of men in uniform standing in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458085.jpg", "caption": "a woman playing tennis on a blue court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458089.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327020.jpg", "caption": "a street light with a traffic light and a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195951.jpg", "caption": "a double decker bus on a rainy day", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195952.jpg", "caption": "a young boy is sleeping in a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064896.jpg", "caption": "a computer mouse sitting on a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064899.jpg", "caption": "a dog and a pig walking on a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195978.jpg", "caption": "two men playing tennis on a clay court", "annotations": [{"polygon": [[425, 95], [427, 50], [389, 35], [234, 36], [191, 60], [192, 97]], "text": "airberlin", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "oiNberlin", "recog_valid": false, "glyph_recog_text": "airberlin", "glyph_recog_ld": 0.7777780246910837}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064908.jpg", "caption": "a large airplane parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195993.jpg", "caption": "a red and white sign", "annotations": [{"polygon": [[45, 167], [205, 206], [208, 174], [51, 137]], "text": "CONGRESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CONGRESS", "recog_valid": true, "glyph_recog_text": "CONGRESS", "glyph_recog_ld": 1.0}, {"polygon": [[324, 274], [419, 202], [416, 168], [324, 241]], "text": "BARNARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BARNARD", "recog_valid": true, "glyph_recog_text": "BARNARD", "glyph_recog_ld": 1.0}, {"polygon": [[263, 396], [342, 399], [342, 372], [258, 367]], "text": "YIELD", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "YIELD", "recog_valid": true, "glyph_recog_text": "YIELD", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000195998.jpg", "caption": "a man holding an orange under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196002.jpg", "caption": "a pink donut sitting on the steering wheel of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458160.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196018.jpg", "caption": "a table with a variety of food items on it", "annotations": [{"polygon": [[318, 464], [320, 494], [417, 491], [412, 467]], "text": "Petitplat", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Petitplat", "recog_valid": true, "glyph_recog_text": "Petitplat", "glyph_recog_ld": 1.0}, {"polygon": [[417, 467], [422, 500], [451, 499], [447, 470]], "text": "BY", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "oy", "recog_valid": false, "glyph_recog_text": "B", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000064954.jpg", "caption": "a wedding cake and flowers on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327105.jpg", "caption": "a stop sign on a street corner", "annotations": [{"polygon": [[310, 225], [403, 219], [404, 259], [310, 263]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458189.jpg", "caption": "a stop sign with a sign has texts", "annotations": [{"polygon": [[213, 149], [211, 180], [303, 203], [304, 174]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327132.jpg", "caption": "two pizzas in a box sitting on the grass", "annotations": [{"polygon": [[397, 384], [448, 353], [449, 362], [397, 396]], "text": "copen hagen", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "COPENTHAGEN", "recog_valid": false, "glyph_recog_text": "海", "glyph_recog_ld": 9.090900826569381e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458205.jpg", "caption": "a large jet airplane sitting on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196075.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[318, 174], [323, 150], [299, 139], [225, 128], [225, 167]], "text": "rioles", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "gnloles", "recog_valid": false, "glyph_recog_text": "rioles", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065011.jpg", "caption": "two men standing next to each other on a street", "annotations": [{"polygon": [[255, 79], [255, 103], [327, 113], [327, 89]], "text": "INSPECTION", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ISPESTEON", "recog_valid": false, "glyph_recog_text": "INSPECTION", "glyph_recog_ld": 0.7000002999996999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458230.jpg", "caption": "a young girl playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196090.jpg", "caption": "a street with a sign has texts", "annotations": [{"polygon": [[35, 99], [40, 116], [54, 111], [71, 97], [94, 91], [123, 98], [141, 116], [156, 94], [118, 74], [96, 72], [70, 78]], "text": "CRAZY DAV'S INTERNATIONAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458249.jpg", "caption": "a pizza on a tray with a fork and a drink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196108.jpg", "caption": "a woman in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327198.jpg", "caption": "a desk with a computer, keyboard, mouse, and a phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196134.jpg", "caption": "a traffic light with no turn signal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327209.jpg", "caption": "three people standing around a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458286.jpg", "caption": "a computer desk with a computer monitor and keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065072.jpg", "caption": "a large elk standing in front of a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327221.jpg", "caption": "a snowboarder is riding down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196178.jpg", "caption": "two people sitting on a bench with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458326.jpg", "caption": "a bowl and two remotes on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327258.jpg", "caption": "two police officers on horses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196198.jpg", "caption": "a toilet with a mosaic cover on it", "annotations": [{"polygon": [[223, 121], [217, 199], [393, 217], [419, 160], [407, 138]], "text": "POOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "POOP", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458349.jpg", "caption": "a blue table with candles and a cake on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065136.jpg", "caption": "a man sitting on a bench with a snowboard", "annotations": [{"polygon": [[255, 354], [210, 157], [239, 155], [284, 347]], "text": "SERRA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "H8835", "recog_valid": false, "glyph_recog_text": "SERRA", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[168, 225], [152, 230], [158, 268], [186, 297], [198, 294], [193, 272]], "text": "vitro", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "omc", "recog_valid": false, "glyph_recog_text": "vitro", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327281.jpg", "caption": "a car driving on a road with a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065139.jpg", "caption": "a car driving down a road with a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458361.jpg", "caption": "a man on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065162.jpg", "caption": "a group of baseball players walking on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196245.jpg", "caption": "a police motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065179.jpg", "caption": "a child using a snow shovel to clear the road", "annotations": [{"polygon": [[116, 128], [150, 122], [149, 146], [117, 153]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327337.jpg", "caption": "a parking meter with a rusty metal pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065207.jpg", "caption": "a man sitting in front of a television watching a movie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065209.jpg", "caption": "a young man sitting on the ground with a soccer ball", "annotations": [{"polygon": [[305, 210], [304, 276], [368, 278], [369, 212]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": true, "glyph_recog_text": "A", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458475.jpg", "caption": "a skier in mid air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458478.jpg", "caption": "a dog laying on the floor next to a pair of shoes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196345.jpg", "caption": "a man in a suit and tie sitting in a courtroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327432.jpg", "caption": "an old black and white photo of a street with people and vehicles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458505.jpg", "caption": "a motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196377.jpg", "caption": "two men standing in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458522.jpg", "caption": "a computer desk with a keyboard, mouse and monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065325.jpg", "caption": "a stop sign and a road sign at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065329.jpg", "caption": "a person sitting in front of a computer with their feet up", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327499.jpg", "caption": "a man in a wetsuit riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458617.jpg", "caption": "a plate of sandwiches and other food on a table", "annotations": [{"polygon": [[359, 124], [357, 136], [376, 137], [395, 129], [409, 121], [417, 113], [422, 107], [426, 101], [429, 90], [425, 96], [419, 103], [413, 107], [402, 115], [398, 118], [387, 122]], "text": "Welc", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Cee", "recog_valid": false, "glyph_recog_text": "Welc", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[370, 141], [370, 147], [388, 143], [402, 136], [411, 128], [416, 123], [418, 117], [414, 122], [409, 126], [403, 129], [397, 133], [392, 136], [384, 138], [378, 139]], "text": "Concord", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Wenupse", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065404.jpg", "caption": "a girl kicking a soccer ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065407.jpg", "caption": "a group of people sitting in chairs with a man writing on a piece of paper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196480.jpg", "caption": "a black and white cat sitting on top of pizza boxes", "annotations": [{"polygon": [[87, 454], [98, 464], [53, 485], [44, 474]], "text": "AGES", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "AGES", "recog_valid": true, "glyph_recog_text": "AGES", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327561.jpg", "caption": "three people standing in front of a sign for the ski club", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327563.jpg", "caption": "a monkey climbing on top of a machine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458637.jpg", "caption": "a snowboarder is in the air doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458640.jpg", "caption": "delta airlines a320-2142-delta-delta-delta-airways-airlines-aircraft-aircraft-aircraft", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065425.jpg", "caption": "a black and white photo of a man holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327576.jpg", "caption": "a bed with several books on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458649.jpg", "caption": "a large airplane sitting on the tarmac", "annotations": [{"polygon": [[349, 258], [338, 288], [302, 275], [312, 251]], "text": "world", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "worlo", "recog_valid": false, "glyph_recog_text": "world", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065457.jpg", "caption": "a cat laying in a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196529.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327610.jpg", "caption": "a bus parked in a parking lot", "annotations": [{"polygon": [[454, 374], [453, 420], [508, 412], [511, 382]], "text": "Star", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Stai", "recog_valid": false, "glyph_recog_text": "Star", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327625.jpg", "caption": "a street with a motorcycle and a car driving down it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327628.jpg", "caption": "a pole with a bunch of street signs on it", "annotations": [{"polygon": [[217, 191], [217, 179], [222, 175], [256, 165], [257, 187], [220, 196]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[277, 157], [322, 144], [318, 159], [318, 166], [282, 179]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[218, 226], [221, 218], [255, 230], [257, 253], [220, 240], [218, 234]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[134, 413], [135, 452], [212, 465], [212, 465], [210, 427]], "text": "FASHION", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "FASHION", "recog_valid": true, "glyph_recog_text": "FASHION", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065518.jpg", "caption": "a baseball game is in progress with a catcher and a batter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327664.jpg", "caption": "a stop sign with a clear sky in the background", "annotations": [{"polygon": [[154, 153], [365, 287], [371, 382], [119, 266]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458748.jpg", "caption": "a cell phone sitting on a table next to a cup", "annotations": [{"polygon": [[105, 169], [157, 106], [163, 166], [118, 223]], "text": "SNP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SIe", "recog_valid": false, "glyph_recog_text": "SNP", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196610.jpg", "caption": "a yellow and blue vending machine with bicycles on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458756.jpg", "caption": "a bowl of bananas and apples", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458762.jpg", "caption": "two cats are playing with each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458772.jpg", "caption": "a busy intersection with cars and pedestrians", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065562.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[237, 223], [239, 262], [288, 268], [284, 226]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "25", "recog_valid": true, "glyph_recog_text": "25", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065588.jpg", "caption": "a man eating a sandwich", "annotations": [{"polygon": [[378, 355], [383, 344], [397, 352], [408, 362], [416, 372], [422, 379], [430, 384], [435, 391], [440, 398], [427, 409], [423, 401], [413, 391], [405, 382], [400, 372], [392, 365]], "text": "REBELLION.", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "REBELLION", "recog_valid": false, "glyph_recog_text": "REBELLION", "glyph_recog_ld": 1.0}, {"polygon": [[300, 350], [311, 356], [316, 347], [322, 344], [338, 336], [350, 332], [342, 319], [321, 327], [308, 336]], "text": "WINDIV", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EWNDII", "recog_valid": false, "glyph_recog_text": "WINDIV", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196676.jpg", "caption": "a cow standing on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327761.jpg", "caption": "a man in a suit talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458839.jpg", "caption": "a man wearing a tie and headphones", "annotations": [{"polygon": [[509, 206], [480, 219], [479, 239], [509, 228]], "text": "HOPE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "390H", "recog_valid": false, "glyph_recog_text": "HOPE", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196701.jpg", "caption": "wallace street sign", "annotations": [{"polygon": [[114, 250], [289, 241], [288, 268], [111, 278]], "text": "WALLACE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WALLACE", "recog_valid": true, "glyph_recog_text": "WALLACE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065632.jpg", "caption": "two soccer players are fighting for the ball", "annotations": [{"polygon": [[396, 261], [360, 279], [397, 323], [408, 323], [442, 297], [441, 287], [408, 261]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327805.jpg", "caption": "a couple of people in a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327808.jpg", "caption": "a stop sign with a sign in arabic", "annotations": [{"polygon": [[183, 250], [340, 248], [342, 296], [181, 296]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327820.jpg", "caption": "a group of people standing in line at a ski resort", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196775.jpg", "caption": "a solar powered parking meter on a sidewalk", "annotations": [{"polygon": [[294, 233], [300, 233], [306, 235], [311, 237], [315, 240], [319, 246], [322, 251], [324, 260], [316, 263], [314, 255], [313, 253], [310, 249], [305, 245], [297, 245]], "text": "HERE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AERE", "recog_valid": false, "glyph_recog_text": "HERE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065705.jpg", "caption": "a double decker bus driving down a street at night", "annotations": [{"polygon": [[17, 412], [23, 432], [264, 425], [265, 402]], "text": "TAXIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TAXIS", "recog_valid": true, "glyph_recog_text": "T A X I S", "glyph_recog_ld": 0.5555560493821674}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327895.jpg", "caption": "a double decker bus is parked on the side of the road", "annotations": [{"polygon": [[182, 338], [163, 379], [312, 383], [313, 335]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458970.jpg", "caption": "a train is pulling into a station with people walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065755.jpg", "caption": "a person riding a jet ski in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000458969.jpg", "caption": "three children sitting on the floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196842.jpg", "caption": "a man and a woman riding a bike with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459003.jpg", "caption": "a group of people walking down a snowy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327935.jpg", "caption": "a baseball player swinging at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000327936.jpg", "caption": "two elephants standing next to a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196870.jpg", "caption": "a yellow door with a red bench in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459018.jpg", "caption": "a bathroom with urinals and a view of the city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459026.jpg", "caption": "a train on the tracks with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196912.jpg", "caption": "two people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196916.jpg", "caption": "an american airlines airplane is flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459078.jpg", "caption": "a baseball player walking on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459082.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196948.jpg", "caption": "a red motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459113.jpg", "caption": "a bus driving down a street with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459118.jpg", "caption": "a desk with a laptop, a keyboard, and a monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196977.jpg", "caption": "a toilet sitting on a black and white checkered floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328054.jpg", "caption": "a silver and blue fire hydrant", "annotations": [{"polygon": [[196, 347], [224, 358], [254, 359], [279, 355], [293, 347], [295, 353], [289, 369], [260, 378], [226, 377], [196, 367]], "text": "KENNEDY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SENNEDY", "recog_valid": false, "glyph_recog_text": "KENNEDY", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065916.jpg", "caption": "a street sign with a one way street sign and a street sign", "annotations": [{"polygon": [[205, 219], [205, 243], [318, 265], [316, 242]], "text": "WARW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WARWY", "recog_valid": false, "glyph_recog_text": "WARW", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065924.jpg", "caption": "a street with a lot of cars and a lot of buildings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000196998.jpg", "caption": "a man carrying a box with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197001.jpg", "caption": "a man on a skateboard doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459152.jpg", "caption": "a person holding up a candy bar in front of a store", "annotations": [{"polygon": [[80, 220], [99, 221], [146, 221], [145, 253], [118, 261], [82, 255], [78, 229]], "text": "Yupi", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "topi", "recog_valid": false, "glyph_recog_text": "Yupi", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[136, 266], [141, 296], [141, 299], [173, 293], [191, 292], [220, 291], [247, 290], [282, 293], [301, 293], [324, 298], [326, 275], [335, 274], [323, 265], [311, 259], [283, 259], [247, 258], [201, 256], [168, 259], [142, 263]], "text": "HOTDOG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HOTDOG", "recog_valid": true, "glyph_recog_text": "HOTDOG", "glyph_recog_ld": 1.0}, {"polygon": [[377, 393], [404, 406], [443, 435], [449, 424], [409, 395], [380, 383]], "text": "HOTDOG", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "HOTDOC", "recog_valid": false, "glyph_recog_text": "HOTDOG", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[401, 358], [427, 370], [454, 385], [468, 396], [473, 387], [447, 368], [421, 353], [405, 345]], "text": "HOTDOG", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "HOTDOC", "recog_valid": false, "glyph_recog_text": "HOTDOG", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065965.jpg", "caption": "a woman wearing a banana on her head", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000065968.jpg", "caption": "a view of the mountains from a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459185.jpg", "caption": "a horse and rider jumping over a wooden fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328120.jpg", "caption": "a row of bicycles parked on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197068.jpg", "caption": "a cell phone sitting on a table next to a charger", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197069.jpg", "caption": "a baseball player running to home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197120.jpg", "caption": "a large airplane sitting on top of an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328216.jpg", "caption": "a train is passing by a group of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066082.jpg", "caption": "a pink double decker bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197155.jpg", "caption": "a vase with flowers sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197169.jpg", "caption": "a family playing monopoly on a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197195.jpg", "caption": "a poster for the movie, 'the last dance'", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066127.jpg", "caption": "airstream - graffitied - miami, fl", "annotations": [{"polygon": [[173, 149], [205, 139], [228, 134], [237, 158], [233, 166], [220, 164], [208, 163], [188, 164], [177, 169]], "text": "KENOTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "物", "recog_valid": false, "glyph_recog_text": "KENOTA", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197213.jpg", "caption": "a jockey is walking with a horse on a track", "annotations": [{"polygon": [[378, 267], [399, 265], [403, 287], [395, 300], [404, 303], [406, 314], [387, 317], [383, 298], [387, 287], [381, 285], [375, 276]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Q", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197221.jpg", "caption": "a group of kids sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328295.jpg", "caption": "a polar bear laying on its back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328298.jpg", "caption": "a plate with soup, bread, and pickles on it", "annotations": [{"polygon": [[331, 378], [339, 394], [356, 389], [373, 382], [384, 379], [393, 377], [399, 376], [408, 373], [421, 368], [412, 352]], "text": "PREMIUM ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WnIW38d", "recog_valid": false, "glyph_recog_text": "PREMIUM", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066156.jpg", "caption": "a police motorcycle is on the back of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066164.jpg", "caption": "a dog laying on a couch with a remote control", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197243.jpg", "caption": "a man and woman in a bathroom", "annotations": [{"polygon": [[372, 55], [379, 74], [416, 62], [412, 36]], "text": "PISS", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "MET", "recog_valid": false, "glyph_recog_text": "PISS", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197264.jpg", "caption": "two men standing next to each other holding skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459429.jpg", "caption": "a woman holding a banana", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197300.jpg", "caption": "a stop sign with a hammer time sign on it", "annotations": [{"polygon": [[181, 185], [172, 210], [163, 242], [170, 260], [343, 267], [373, 232], [379, 218], [372, 194]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197307.jpg", "caption": "a tennis player is holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328382.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066246.jpg", "caption": "a man swinging a bat at a baseball", "annotations": [{"polygon": [[111, 217], [128, 229], [142, 197], [123, 182]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "L)", "recog_valid": false, "glyph_recog_text": "9", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459465.jpg", "caption": "two sheep standing on a wooden fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328427.jpg", "caption": "a man standing in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459515.jpg", "caption": "a pizza with mushrooms, broccoli and cheese in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197383.jpg", "caption": "a woman standing next to a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066328.jpg", "caption": "a baseball game with a coca cola sign in the background", "annotations": [{"polygon": [[237, 55], [210, 70], [224, 69], [230, 73], [239, 82], [247, 96], [250, 116], [277, 106], [267, 77], [247, 64]], "text": "Cola", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "fooe", "recog_valid": false, "glyph_recog_text": "Cola", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328487.jpg", "caption": "a street with a bus and a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459569.jpg", "caption": "a baseball player swinging a bat on a field", "annotations": [{"polygon": [[162, 90], [164, 129], [178, 129], [188, 118], [242, 115], [266, 113], [267, 86]], "text": "FIRST", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "FIRCT", "recog_valid": false, "glyph_recog_text": "FIRST", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[74, 94], [75, 104], [71, 126], [39, 128], [0, 130], [0, 100], [22, 100], [51, 97]], "text": "RST", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "RST", "recog_valid": true, "glyph_recog_text": "RST", "glyph_recog_ld": 1.0}, {"polygon": [[57, 65], [54, 94], [33, 94], [1, 96], [1, 65]], "text": "EAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "AN", "recog_valid": false, "glyph_recog_text": "EAN", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066366.jpg", "caption": "a baseball player is filming a game on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066388.jpg", "caption": "a chef is preparing food on a cutting board", "annotations": [{"polygon": [[327, 113], [339, 120], [356, 134], [358, 124], [342, 109], [341, 107], [328, 100]], "text": "WESTI", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "WESTM", "recog_valid": false, "glyph_recog_text": "WESTI", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066389.jpg", "caption": "two baseball players are high fiving each other", "annotations": [{"polygon": [[98, 248], [94, 306], [128, 310], [133, 252]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "00", "recog_valid": false, "glyph_recog_text": "co", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066398.jpg", "caption": "a table with food and a bowl of rice", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459618.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197482.jpg", "caption": "a dog walking next to a car on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459631.jpg", "caption": "a clock is reflected in a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459653.jpg", "caption": "a baseball player is throwing a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066444.jpg", "caption": "two men in red uniforms on horseback", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328588.jpg", "caption": "a person's hand on a mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328606.jpg", "caption": "an old black and white photo of a steam train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197561.jpg", "caption": "a refrigerator with magnets on it and a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197568.jpg", "caption": "a cat sitting on a chair watching a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459712.jpg", "caption": "a black dog laying under a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459718.jpg", "caption": "a person holding a wii remote and a nunchuck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328654.jpg", "caption": "a man standing in front of a refrigerator in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197583.jpg", "caption": "a woman holding a purple plastic box in front of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459731.jpg", "caption": "a train is pulling into a station at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328662.jpg", "caption": "a motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328663.jpg", "caption": "three men in ski gear posing for a picture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328676.jpg", "caption": "a man cutting a cake with children around him", "annotations": [{"polygon": [[253, 234], [253, 224], [255, 216], [259, 210], [266, 204], [276, 199], [283, 195], [293, 195], [301, 197], [308, 198], [317, 204], [309, 211], [305, 215], [296, 215], [292, 215], [285, 216], [277, 222], [272, 228], [268, 235], [267, 240], [265, 245]], "text": "BREO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ABRF", "recog_valid": false, "glyph_recog_text": "BREO", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328699.jpg", "caption": "a black and white photo of people skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066566.jpg", "caption": "two bento boxes with food on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197638.jpg", "caption": "a large yellow teddy bear kite", "annotations": [{"polygon": [[262, 343], [272, 353], [246, 378], [237, 366]], "text": "ON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ON", "recog_valid": true, "glyph_recog_text": "ON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066574.jpg", "caption": "a green bus driving down a street next to a building", "annotations": [{"polygon": [[509, 176], [509, 176], [510, 201], [428, 212], [427, 188]], "text": "CORA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CORA", "recog_valid": true, "glyph_recog_text": "CORA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197650.jpg", "caption": "a cow with a tag on its ear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197651.jpg", "caption": "a man and woman posing in front of a restaurant sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066586.jpg", "caption": "a man holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197662.jpg", "caption": "a group of people cutting a cake at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066593.jpg", "caption": "a woman holding an umbrella in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459819.jpg", "caption": "a white box with a wii remote in it", "annotations": [{"polygon": [[73, 186], [76, 240], [220, 239], [227, 180]], "text": "wii", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wii", "recog_valid": false, "glyph_recog_text": "wii", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328751.jpg", "caption": "a train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459827.jpg", "caption": "a man is walking a white horse in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197697.jpg", "caption": "a woman in a leather outfit sitting on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459848.jpg", "caption": "a train sitting on the tracks with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197713.jpg", "caption": "a man holding a hot dog with onions and peppers", "annotations": [{"polygon": [[464, 428], [472, 420], [481, 412], [489, 408], [488, 400], [481, 402], [471, 408], [460, 417], [457, 423], [461, 430]], "text": "COUNTY", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "COUNTY", "recog_valid": true, "glyph_recog_text": "COZNTY", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197718.jpg", "caption": "three men standing in front of a television screen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328808.jpg", "caption": "a fire hydrant with a face on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000459959.jpg", "caption": "a row of stop signs with the words stress stop on them", "annotations": [{"polygon": [[312, 156], [313, 210], [392, 201], [389, 147]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[265, 172], [261, 217], [296, 212], [290, 171]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "云", "recog_valid": false, "glyph_recog_text": "c0k", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066754.jpg", "caption": "a group of people walking down the street at night", "annotations": [{"polygon": [[-1, 133], [131, 158], [130, 185], [-1, 164]], "text": "HOSENDO", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HOSENDO", "recog_valid": true, "glyph_recog_text": "HOSENDO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066755.jpg", "caption": "a man playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066767.jpg", "caption": "a bathroom with mirrors and a mosaic wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197853.jpg", "caption": "a clock tower with a weather vane on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328935.jpg", "caption": "flinders street station, melbourne, australia", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460031.jpg", "caption": "a group of people waiting for a train at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066825.jpg", "caption": "a horse made out of wood", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460045.jpg", "caption": "a large airplane with people standing around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460052.jpg", "caption": "a woman holding a wii remote in her hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000328981.jpg", "caption": "a man is petting a horse in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066858.jpg", "caption": "a large jet airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066862.jpg", "caption": "a man is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066877.jpg", "caption": "a young girl is reaching for a hot dog on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329024.jpg", "caption": "a person on a skateboard", "annotations": [{"polygon": [[441, 418], [442, 374], [512, 375], [512, 419]], "text": "R4", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "RA", "recog_valid": false, "glyph_recog_text": "R4", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460113.jpg", "caption": "a surfer in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460115.jpg", "caption": "a tennis court with a net", "annotations": [{"polygon": [[384, 131], [382, 149], [427, 164], [429, 147]], "text": "VENEIIAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "VENETIAN", "recog_valid": false, "glyph_recog_text": "VENEIAN", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000197980.jpg", "caption": "a computer monitor on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329054.jpg", "caption": "a man and a woman cutting a cake with a group of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460134.jpg", "caption": "a woman in a blue shirt and yellow hat standing next to a table with a bunch of donuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000066944.jpg", "caption": "two baseball players high five after a game", "annotations": [{"polygon": [[132, 181], [124, 181], [124, 175], [130, 166], [142, 165], [156, 165], [176, 164], [178, 172], [178, 204], [171, 210], [159, 210], [133, 212], [127, 205]], "text": "35", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "35", "recog_valid": true, "glyph_recog_text": "35", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329090.jpg", "caption": "a woman holding a bottle of water in her hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329107.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329124.jpg", "caption": "a stop sign with a tree in the background", "annotations": [{"polygon": [[148, 302], [150, 214], [351, 212], [347, 298]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329126.jpg", "caption": "a stop sign with a street sign on top of it", "annotations": [{"polygon": [[270, 37], [271, 69], [415, 85], [415, 59]], "text": "Steinerweg", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Sieinerweg", "recog_valid": false, "glyph_recog_text": "Steinerweg", "glyph_recog_ld": 0.9000000999999}, {"polygon": [[141, 76], [142, 110], [235, 142], [235, 116]], "text": "Institutstr.", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Insfituisir.", "recog_valid": false, "glyph_recog_text": "Institutstr.", "glyph_recog_ld": 0.7500002083331597}, {"polygon": [[153, 222], [153, 302], [336, 305], [329, 225]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198054.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329127.jpg", "caption": "a display case filled with doughnuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329134.jpg", "caption": "a person sitting on a couch with a dog laying on their lap", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198068.jpg", "caption": "a baseball player standing on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198072.jpg", "caption": "a man in a helmet is reflected in a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460254.jpg", "caption": "a man playing tennis on a court", "annotations": [{"polygon": [[167, 88], [425, 96], [427, 196], [19, 195], [25, 96]], "text": "Gillette", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Gillette", "recog_valid": true, "glyph_recog_text": "Gillette", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329188.jpg", "caption": "a group of buses parked on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460287.jpg", "caption": "a dog laying on the floor next to a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067084.jpg", "caption": "two girls playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067085.jpg", "caption": "a group of people standing around a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198169.jpg", "caption": "a woman in black and white talking to another woman", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067115.jpg", "caption": "three giraffes standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329276.jpg", "caption": "a group of people in boats on a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460353.jpg", "caption": "a fruit and vegetable display case in a grocery store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067143.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[68, 226], [402, 207], [407, 359], [67, 366]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[109, 396], [144, 384], [154, 397], [152, 433], [127, 433]], "text": "WJ", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "y", "recog_valid": false, "glyph_recog_text": "毛", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[345, 307], [357, 324], [428, 295], [418, 280]], "text": "Believin", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Believin", "recog_valid": true, "glyph_recog_text": "Bellevin", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460363.jpg", "caption": "a man in red shirt doing a skateboard trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067163.jpg", "caption": "a street sign for avenue du boulevard 6 6 division", "annotations": [{"polygon": [[164, 142], [309, 133], [314, 159], [207, 167], [161, 170], [161, 142]], "text": "DIVISION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DIVISION", "recog_valid": true, "glyph_recog_text": "DIVISION", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460389.jpg", "caption": "three horses are standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067186.jpg", "caption": "a person holding a samsung flip phone", "annotations": [{"polygon": [[146, 159], [197, 126], [202, 135], [151, 169]], "text": "Duane", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Duane", "recog_valid": true, "glyph_recog_text": "Suenr", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[254, 251], [259, 246], [261, 248], [302, 220], [306, 225], [258, 258]], "text": "contucts", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Contacts", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[365, 450], [372, 471], [399, 455], [385, 434]], "text": "0", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "0", "recog_valid": true, "glyph_recog_text": "0", "glyph_recog_ld": 1.0}, {"polygon": [[88, 136], [98, 151], [133, 129], [166, 107], [209, 76], [201, 60]], "text": "SAMSUNG", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SAMSUNG", "recog_valid": true, "glyph_recog_text": "SAMSUNG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198263.jpg", "caption": "a man on a bike is standing in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067200.jpg", "caption": "a baseball player sliding into home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329351.jpg", "caption": "two photos of a hot dog in a box", "annotations": [{"polygon": [[193, 109], [282, 87], [295, 93], [296, 98], [294, 113], [277, 126], [267, 128], [196, 139]], "text": "JERRY'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Jerry'", "recog_valid": false, "glyph_recog_text": "JERRY'S", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198289.jpg", "caption": "a woman in pink tennis outfit holding a racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067220.jpg", "caption": "a police officer on a motorcycle driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067222.jpg", "caption": "a bedroom with a bed, dresser, and chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067241.jpg", "caption": "a man kiteboarding in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460458.jpg", "caption": "a young man riding a skateboard in the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329387.jpg", "caption": "a large airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067246.jpg", "caption": "a blue plate with a picture of a bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198319.jpg", "caption": "a sign that says prepare to stop on the side of the road", "annotations": [{"polygon": [[120, 276], [118, 309], [167, 310], [168, 276]], "text": "40", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "40", "recog_valid": true, "glyph_recog_text": "40", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067248.jpg", "caption": "a woman looking at her cell phone", "annotations": [{"polygon": [[126, 65], [131, 138], [166, 64]], "text": "local", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "-。", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460470.jpg", "caption": "a double decker bus with a man on the top", "annotations": [{"polygon": [[134, 209], [209, 188], [210, 207], [134, 224]], "text": "THE LONDON TOUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "mHlOOONTOR cok", "recog_valid": false, "glyph_recog_text": "TAB LONOXKNN TOAUR", "glyph_recog_ld": 0.16666712962937236}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460503.jpg", "caption": "a hot dog and a bottle of ketchup on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198363.jpg", "caption": "a man jumping to hit a tennis ball", "annotations": [{"polygon": [[143, 191], [138, 238], [330, 231], [332, 201]], "text": "Carour", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Jareebur", "recog_valid": false, "glyph_recog_text": "Carour", "glyph_recog_ld": 0.5000006249992187}, {"polygon": [[336, 243], [333, 290], [254, 290], [257, 244]], "text": "rket", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "rket", "recog_valid": true, "glyph_recog_text": "rket", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329462.jpg", "caption": "a street sign with a arrow pointing to the right", "annotations": [{"polygon": [[185, 245], [276, 243], [274, 275], [186, 277]], "text": "BUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "sna", "recog_valid": false, "glyph_recog_text": "BUS", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329469.jpg", "caption": "a black and white photo of two people on atvs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198405.jpg", "caption": "a clock on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067339.jpg", "caption": "a pizza on a white plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198429.jpg", "caption": "a cake with a baby and teddy bear", "annotations": [{"polygon": [[220, 443], [319, 443], [309, 409], [249, 412], [254, 392], [225, 395]], "text": "Eva", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Eva", "recog_valid": true, "glyph_recog_text": "Eva", "glyph_recog_ld": 1.0}, {"polygon": [[117, 338], [296, 338], [295, 357], [320, 358], [327, 337], [344, 336], [344, 317], [343, 312], [264, 316], [268, 296], [210, 298], [204, 317], [112, 320], [111, 337]], "text": "sweet-things", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "dling", "recog_valid": false, "glyph_recog_text": "sweet-things", "glyph_recog_ld": 0.25000062499947917}, {"polygon": [[338, 296], [412, 291], [413, 341], [343, 341]], "text": "08", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "08", "recog_valid": true, "glyph_recog_text": "08", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067407.jpg", "caption": "a carousel is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067414.jpg", "caption": "a car parked next to a zebra", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198506.jpg", "caption": "a group of people standing near a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198514.jpg", "caption": "st mary's church, london, england", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067446.jpg", "caption": "a group of people standing in front of a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329616.jpg", "caption": "a sandwich and a side of beets on a pink plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198547.jpg", "caption": "a toy motor scooter with a british flag on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460694.jpg", "caption": "starbucks, new york city, usa", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329623.jpg", "caption": "a table with food on it and a person standing next to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067490.jpg", "caption": "a traffic light is on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329638.jpg", "caption": "two white birds are walking in the grass near the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329640.jpg", "caption": "a man is flying a kite in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460728.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198600.jpg", "caption": "a stop sign on a sidewalk next to a row of buildings", "annotations": [{"polygon": [[407, 140], [407, 183], [470, 193], [472, 154]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329684.jpg", "caption": "a large clock tower with a blue and red face", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198611.jpg", "caption": "a desk with a laptop on it and a bunch of books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460780.jpg", "caption": "a traffic light that is red", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067573.jpg", "caption": "four green apples on a cutting board", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329726.jpg", "caption": "a building with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198654.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329730.jpg", "caption": "an older man standing next to a wood fired oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460810.jpg", "caption": "a man on a bike rides past a chinese archway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329756.jpg", "caption": "a group of men in military uniforms cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067615.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460833.jpg", "caption": "a cutting board with carrots, celery, and other ingredients", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329765.jpg", "caption": "a bag of apples sitting on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067623.jpg", "caption": "a view of the highway from inside a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329784.jpg", "caption": "a woman standing next to a motorcycle with a sign", "annotations": [{"polygon": [[116, 123], [127, 144], [141, 150], [153, 143], [161, 126], [160, 112], [154, 104], [141, 102], [125, 103], [118, 108]], "text": "SERU", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "E印", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[277, 241], [274, 245], [321, 272], [322, 269]], "text": "www.serv.org.uk", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "woirw.keryara ut", "recog_valid": false, "glyph_recog_text": "ihii...k.", "glyph_recog_ld": 0.12500054687465822}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460862.jpg", "caption": "a group of people sitting around a table with laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460863.jpg", "caption": "a us air force plane parked in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067657.jpg", "caption": "a bottle of beer and a glass of wine on a table", "annotations": [{"polygon": [[454, 369], [452, 373], [503, 404], [503, 397]], "text": "DOCK", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "ROMNDOCK", "recog_valid": false, "glyph_recog_text": "::,.", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460880.jpg", "caption": "a sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198735.jpg", "caption": "four men standing on a tennis court holding tennis rackets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460885.jpg", "caption": "two boys sitting on the floor with laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198751.jpg", "caption": "a parking meter with a bicycle sticker on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460897.jpg", "caption": "a man riding a bike past a large white building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460910.jpg", "caption": "a man on a bike rides past the front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329847.jpg", "caption": "a woman bending down to pick up a tennis ball", "annotations": [{"polygon": [[100, 211], [100, 154], [105, 146], [110, 141], [115, 140], [178, 130], [181, 130], [183, 132], [220, 155], [217, 186], [209, 183], [202, 178], [194, 176], [181, 170], [165, 168], [147, 169], [122, 183], [111, 210]], "text": "perrier", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "perrier", "recog_valid": true, "glyph_recog_text": "perrier", "glyph_recog_ld": 1.0}, {"polygon": [[368, 216], [370, 213], [372, 211], [408, 204], [410, 204], [411, 205], [431, 217], [430, 236], [427, 233], [422, 230], [418, 229], [411, 226], [402, 225], [393, 225], [376, 245], [368, 246]], "text": "ERRIER", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "serrier", "recog_valid": false, "glyph_recog_text": "ERRIER", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[471, 248], [470, 218], [473, 213], [477, 211], [479, 210], [512, 205], [512, 226], [509, 225], [498, 226], [487, 230], [481, 234], [477, 248]], "text": "Perr", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "pe.", "recog_valid": false, "glyph_recog_text": "Perr", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067771.jpg", "caption": "a garbage truck driving down a snowy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461004.jpg", "caption": "two women and a man standing on a tennis court", "annotations": [{"polygon": [[452, 286], [450, 320], [513, 328], [512, 293]], "text": "LIKE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LIKE", "recog_valid": true, "glyph_recog_text": "LIKE", "glyph_recog_ld": 1.0}, {"polygon": [[461, 336], [461, 363], [469, 367], [503, 371], [513, 372], [512, 332], [487, 329], [471, 325], [465, 328]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "ST", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067798.jpg", "caption": "a tablet computer sitting on a counter next to a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461015.jpg", "caption": "a man is holding up a snowboard in the air", "annotations": [{"polygon": [[257, 86], [263, 95], [333, 41], [323, 31], [256, 85]], "text": "HEAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CVEH", "recog_valid": false, "glyph_recog_text": "HEAD", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461019.jpg", "caption": "a man and a woman holding up a bank note", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198880.jpg", "caption": "two men playing tennis", "annotations": [{"polygon": [[196, 272], [209, 256], [226, 271], [214, 287]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "W", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329954.jpg", "caption": "a man holding a banana", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329988.jpg", "caption": "a bathroom with a toilet, sink, and shower curtain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329989.jpg", "caption": "a car is parked in the middle of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198923.jpg", "caption": "a snowboarder in the air doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330041.jpg", "caption": "a stop sign with a red light on it", "annotations": [{"polygon": [[193, 269], [193, 219], [308, 217], [306, 264]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067937.jpg", "caption": "the big storm is coming", "annotations": [{"polygon": [[127, 443], [126, 467], [152, 467], [161, 476], [174, 474], [174, 449], [154, 442]], "text": "Big", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Big", "recog_valid": true, "glyph_recog_text": "Big", "glyph_recog_ld": 1.0}, {"polygon": [[335, 443], [333, 466], [448, 474], [453, 470], [453, 449], [409, 442]], "text": "coming", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Coming", "recog_valid": false, "glyph_recog_text": "coming", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199011.jpg", "caption": "1927 ford model t for sale in michigan", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330100.jpg", "caption": "a group of people on a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461181.jpg", "caption": "a green train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000068003.jpg", "caption": "a man walks past a sign for the back rub for men", "annotations": [{"polygon": [[170, 48], [169, 77], [219, 79], [219, 50]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "THE", "recog_valid": true, "glyph_recog_text": "THE", "glyph_recog_ld": 1.0}, {"polygon": [[231, 50], [232, 79], [296, 80], [296, 52]], "text": "BEST", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BEST", "recog_valid": true, "glyph_recog_text": "BEST", "glyph_recog_ld": 1.0}, {"polygon": [[170, 83], [170, 112], [235, 113], [234, 84]], "text": "BACK", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BACK", "recog_valid": true, "glyph_recog_text": "BACK", "glyph_recog_ld": 1.0}, {"polygon": [[247, 84], [248, 113], [296, 115], [296, 86]], "text": "RUB", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "RUB", "recog_valid": true, "glyph_recog_text": "RUB", "glyph_recog_ld": 1.0}, {"polygon": [[326, 149], [328, 179], [398, 237], [416, 235], [403, 179]], "text": "RECORD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RCOngG", "recog_valid": false, "glyph_recog_text": "RECORD", "glyph_recog_ld": 0.3333344444425925}, {"polygon": [[319, 282], [315, 307], [382, 298], [371, 275]], "text": "ATM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ATM", "recog_valid": true, "glyph_recog_text": "ATM", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000068048.jpg", "caption": "a cat sitting next to a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199126.jpg", "caption": "a group of people in a small boat on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330208.jpg", "caption": "a man wearing a tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199151.jpg", "caption": "two men sitting at a table with a laptop computer", "annotations": [{"polygon": [[239, 65], [246, 96], [337, 104], [333, 63]], "text": "WHOA!", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "WHOA", "recog_valid": false, "glyph_recog_text": "WHOA!", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330229.jpg", "caption": "a street sign with arrows pointing in different directions", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199165.jpg", "caption": "a bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330253.jpg", "caption": "a bus parked in a lot next to a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461350.jpg", "caption": "a group of stuffed animals sitting on a floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199212.jpg", "caption": "a birthday cake with a train on it", "annotations": [{"polygon": [[82, 275], [86, 307], [157, 354], [165, 331]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HHPPY", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[176, 340], [175, 371], [240, 417], [240, 382]], "text": "4th", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "4ch", "recog_valid": false, "glyph_recog_text": "4 t h", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[279, 377], [268, 423], [391, 323], [400, 290]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "zietbdey", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.5000006249992187}, {"polygon": [[181, 203], [147, 256], [209, 261], [212, 208]], "text": "Alex", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "行", "recog_valid": false, "glyph_recog_text": "Alex", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199225.jpg", "caption": "a man is riding a skateboard on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199243.jpg", "caption": "a large airplane parked on the tarmac at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199244.jpg", "caption": "a vintage poster advertising the broadway limited", "annotations": [{"polygon": [[283, 473], [283, 512], [416, 513], [419, 485], [417, 472]], "text": "LIMITED", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "LIMITEI", "recog_valid": false, "glyph_recog_text": "LIMITED", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[99, 473], [98, 512], [264, 512], [272, 473]], "text": "BROADWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "RRDADWAY", "recog_valid": false, "glyph_recog_text": "BROADWAY", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461389.jpg", "caption": "a woman holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000068183.jpg", "caption": "a plate with a sandwich and fries on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000068194.jpg", "caption": "a train with a man standing in the door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000068231.jpg", "caption": "airplanes parked at a dock with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461450.jpg", "caption": "a blue airplane parked on the tarmac with parked cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330391.jpg", "caption": "a clock on a building", "annotations": [{"polygon": [[170, 53], [242, 80], [243, 110], [165, 82]], "text": "SK", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SLK", "recog_valid": false, "glyph_recog_text": "SK", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461464.jpg", "caption": "a train station with a clock and a sign", "annotations": [{"polygon": [[44, 147], [30, 177], [211, 231], [214, 208]], "text": "PISTOIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "PISTOIA", "recog_valid": true, "glyph_recog_text": "PISTOIA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330395.jpg", "caption": "a man is standing next to a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199331.jpg", "caption": "two men playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461494.jpg", "caption": "a yellow school bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461501.jpg", "caption": "a person holding an umbrella with newspaper articles on it", "annotations": [{"polygon": [[235, 315], [243, 327], [295, 295], [288, 286]], "text": "ONION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONION", "recog_valid": true, "glyph_recog_text": "ONION", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461506.jpg", "caption": "a man standing on the side of a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461509.jpg", "caption": "two young boys playing tennis on a tennis court", "annotations": [{"polygon": [[396, 109], [394, 148], [488, 149], [501, 140], [505, 106], [439, 105], [413, 106]], "text": "mw mark warner", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "MJ", "recog_valid": false, "glyph_recog_text": "mw mark warmer", "glyph_recog_ld": 7.142852040953329e-07}, {"polygon": [[156, 108], [155, 148], [256, 149], [261, 107]], "text": "mw mark warner", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MJ", "recog_valid": false, "glyph_recog_text": "mw makwamer", "glyph_recog_ld": 9.090900826569381e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199363.jpg", "caption": "a birthday cake with candles and horses", "annotations": [{"polygon": [[156, 186], [149, 192], [155, 197], [156, 209], [153, 220], [146, 221], [144, 217], [139, 220], [149, 227], [156, 221], [172, 220], [191, 217], [194, 231], [256, 241], [246, 198], [172, 195], [170, 183]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Hoppy", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[262, 185], [262, 215], [284, 222], [349, 217], [360, 237], [378, 242], [383, 233], [380, 222], [373, 200], [347, 201], [341, 185], [309, 188], [279, 181]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Btutldon", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.37500078124902336}, {"polygon": [[109, 253], [100, 268], [107, 294], [112, 297], [173, 289], [165, 267], [130, 269], [127, 250]], "text": "Bon", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bon", "recog_valid": true, "glyph_recog_text": "Bon", "glyph_recog_ld": 1.0}, {"polygon": [[219, 254], [219, 285], [282, 293], [326, 281], [312, 246]], "text": "chuck", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UhucK", "recog_valid": false, "glyph_recog_text": "chuck", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461517.jpg", "caption": "a group of men playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199381.jpg", "caption": "a baseball player throwing a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330481.jpg", "caption": "a man and a dog sleeping in a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199432.jpg", "caption": "a large airplane sitting on top of a runway", "annotations": [{"polygon": [[277, 484], [299, 501], [312, 401], [293, 386]], "text": "American", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "desiiauy", "recog_valid": false, "glyph_recog_text": "", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072821.jpg", "caption": "a plate with two pieces of banana on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000334981.jpg", "caption": "the roof of a cathedral with many windows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466054.jpg", "caption": "a group of people standing around a luggage cart", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072850.jpg", "caption": "a baseball player holding a baseball", "annotations": [{"polygon": [[278, 155], [280, 155], [296, 189], [197, 303], [171, 251]], "text": "Braves", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "8xaoow", "recog_valid": false, "glyph_recog_text": "Braves", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072861.jpg", "caption": "a baseball game in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466083.jpg", "caption": "two baseball players are standing in a batting cage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466097.jpg", "caption": "a living room with a couch and a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072895.jpg", "caption": "a klm airplane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072902.jpg", "caption": "a man is throwing a frisbee in a park", "annotations": [{"polygon": [[198, 35], [198, 62], [242, 66], [242, 39]], "text": "hastings", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "hastings", "recog_valid": true, "glyph_recog_text": "hastings", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466134.jpg", "caption": "a television set on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335063.jpg", "caption": "a man riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466136.jpg", "caption": "a desk with a computer, a keyboard, a mouse, a monitor, and a plant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335076.jpg", "caption": "two boys with painted faces and hats", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072961.jpg", "caption": "two men sitting on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335109.jpg", "caption": "a tow truck driving down a street in london", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335111.jpg", "caption": "a woman standing on a sidewalk with a bus in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072969.jpg", "caption": "two men walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072978.jpg", "caption": "a street sign with a pole with a street name on it", "annotations": [{"polygon": [[235, 316], [235, 334], [396, 352], [396, 347], [391, 337]], "text": "FAIRGROUNDS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FAIRGROUNDS", "recog_valid": true, "glyph_recog_text": "FAIRGROUNDS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072984.jpg", "caption": "a group of cyclists racing down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072993.jpg", "caption": "a steam train with smoke coming out of the engine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073022.jpg", "caption": "a baseball player is at home plate waiting for the pitch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466243.jpg", "caption": "a young boy is brushing his teeth in a bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073029.jpg", "caption": "a b17 bomber flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335184.jpg", "caption": "a large elephant is walking down the street with people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466260.jpg", "caption": "a young man is riding a skateboard in the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335189.jpg", "caption": "business people sitting at a table writing on paper", "annotations": [{"polygon": [[274, 424], [274, 386], [505, 386], [506, 428]], "text": "WWW.Lifementalhealth", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "wwwfementalhealth.com", "recog_valid": false, "glyph_recog_text": "WWW.Lifementalhealth", "glyph_recog_ld": 0.5238097505667854}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204131.jpg", "caption": "a black and white photo of a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073066.jpg", "caption": "a sign with a hand and a pair of scissors hanging on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466288.jpg", "caption": "three people standing next to a motorcycle in front of a mountain", "annotations": [{"polygon": [[398, 362], [409, 379], [446, 392], [483, 393], [486, 393], [498, 367], [489, 357], [418, 354], [405, 356]], "text": "photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "iPhabyany", "recog_valid": false, "glyph_recog_text": "photography", "glyph_recog_ld": 0.18181892561915847}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335216.jpg", "caption": "mountain top ski area, whistler, bc", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466301.jpg", "caption": "a british airways airplane parked on a wet runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335231.jpg", "caption": "an elephant is standing on its hind legs and reaching for a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466305.jpg", "caption": "a bed with white sheets and pillows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073094.jpg", "caption": "a kitchen with a stove, microwave and refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335255.jpg", "caption": "a large passenger jet flying through a blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204203.jpg", "caption": "a man is looking at a wine glass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204217.jpg", "caption": "a car driving down a street with a red light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073174.jpg", "caption": "a woman in pink is holding a dog while getting her hair cut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073190.jpg", "caption": "two pictures of baseball players in different positions", "annotations": [{"polygon": [[472, 201], [473, 254], [510, 255], [513, 201]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "21", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073192.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335339.jpg", "caption": "a police officer is standing next to a police car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204279.jpg", "caption": "a woman walking down the street with a black cab", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466430.jpg", "caption": "broccoli florets for sale", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335359.jpg", "caption": "a man holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073215.jpg", "caption": "black and white photo of vintage cars in front of train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073226.jpg", "caption": "two laptops on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466445.jpg", "caption": "a man in a suit and tie standing in a bedroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466448.jpg", "caption": "a young boy sitting on a couch playing a video game", "annotations": [{"polygon": [[252, 323], [265, 314], [281, 303], [288, 298], [294, 312], [294, 319], [291, 323], [278, 335], [263, 340], [254, 343], [247, 336]], "text": "HAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZF", "recog_valid": false, "glyph_recog_text": "HAL", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073235.jpg", "caption": "a street with traffic lights and a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204321.jpg", "caption": "a white horse and a brown horse in a pen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073262.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466505.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204380.jpg", "caption": "a stop sign on a street corner in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204384.jpg", "caption": "a young boy playing a video game in front of a fireplace", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466544.jpg", "caption": "a baseball player standing on a field with a umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466547.jpg", "caption": "a red bus driving down the street next to a church", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073339.jpg", "caption": "a person riding a bike on a street next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073342.jpg", "caption": "a clock tower with a flagpole and a flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335486.jpg", "caption": "a train traveling down the tracks on a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204417.jpg", "caption": "a building with a clock on the side of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073348.jpg", "caption": "a red and black dirt bike parked next to other motorcycles", "annotations": [{"polygon": [[276, 143], [256, 150], [320, 182], [331, 170], [275, 144]], "text": "KTM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "7", "recog_valid": false, "glyph_recog_text": "KTM", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466580.jpg", "caption": "a desk with a computer, a chair, a bookcase and a crocheted blanket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335525.jpg", "caption": "a woman holding a cell phone and a bottle of ketchup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466621.jpg", "caption": "two trains are parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335551.jpg", "caption": "a steam train pulling into a station with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073413.jpg", "caption": "two double decker buses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204491.jpg", "caption": "a man doing a trick on a skateboard in a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204505.jpg", "caption": "1936 ford pickup cc-1207898 for sale in san diego, california", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073434.jpg", "caption": "a woman standing in a kitchen with a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204514.jpg", "caption": "three hot dogs wrapped in plastic with labels on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073442.jpg", "caption": "a green and orange food truck parked on a city street", "annotations": [{"polygon": [[149, 219], [149, 214], [150, 215], [150, 207], [148, 206], [148, 200], [158, 199], [171, 199], [171, 203], [177, 203], [184, 203], [183, 206], [191, 206], [194, 206], [197, 205], [200, 205], [201, 206], [202, 208], [203, 212], [202, 215], [196, 216], [174, 218], [173, 221], [172, 228], [189, 230], [189, 233], [203, 234], [212, 235], [213, 238], [222, 238], [223, 238], [224, 236], [225, 235], [228, 234], [229, 236], [229, 237], [237, 238], [238, 240], [238, 248], [237, 250], [228, 249], [186, 249], [170, 248], [165, 235], [163, 235], [162, 233], [162, 225], [162, 223], [162, 222], [161, 219]], "text": "Mike N Wille's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Nio ", "recog_valid": false, "glyph_recog_text": "AMiae t wmeng", "glyph_recog_ld": 0.1538468047332271}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204529.jpg", "caption": "a group of wine glasses and bottles on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204556.jpg", "caption": "a parking meter with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466710.jpg", "caption": "a little girl standing in front of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466714.jpg", "caption": "a white oven with a door and a cabinet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466723.jpg", "caption": "a group of people posing for a photo with snowboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073511.jpg", "caption": "a tray with a clock, a bowl, and other items", "annotations": [{"polygon": [[207, 292], [207, 292], [239, 292], [236, 326], [202, 326]], "text": "03", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "03", "recog_valid": true, "glyph_recog_text": "0", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466730.jpg", "caption": "a group of motorcycles are lined up in a showroom", "annotations": [{"polygon": [[144, 111], [126, 154], [200, 164], [211, 123]], "text": "RM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RM", "recog_valid": true, "glyph_recog_text": "RM", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335660.jpg", "caption": "two pictures of a woman playing tennis", "annotations": [{"polygon": [[141, 171], [145, 209], [207, 207], [206, 168]], "text": "Owls", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ouls", "recog_valid": false, "glyph_recog_text": "Owls", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[390, 170], [391, 210], [456, 204], [453, 171]], "text": "Owls", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Ouls", "recog_valid": false, "glyph_recog_text": "Owls", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204603.jpg", "caption": "a couple of teddy bears hanging from a net", "annotations": [{"polygon": [[115, 281], [201, 204], [269, 219], [185, 292]], "text": "HSBS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "HSBS", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466750.jpg", "caption": "a white bowl with chicken, broccoli, and cucumber", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204606.jpg", "caption": "a man wearing a dinosaur mask brushing his teeth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073537.jpg", "caption": "two people holding up pictures of a man and woman", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335683.jpg", "caption": "japan airlines boeing 787-9", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466775.jpg", "caption": "a bus driving down a street with people walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073568.jpg", "caption": "a woman sitting at a table with a large platter of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335712.jpg", "caption": "a woman standing next to a giant teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335722.jpg", "caption": "a group of people standing around a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204657.jpg", "caption": "a group of people standing on the shore with surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204683.jpg", "caption": "a basket with fruit, muffins, and other items", "annotations": [{"polygon": [[449, 154], [468, 180], [459, 186], [438, 161]], "text": "tac", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "tac", "recog_valid": true, "glyph_recog_text": "tac", "glyph_recog_ld": 1.0}, {"polygon": [[428, 180], [428, 180], [434, 176], [434, 176], [434, 176], [458, 200], [458, 200], [454, 208]], "text": "grapefruit", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "9rapefruit", "recog_valid": false, "glyph_recog_text": "q4WR:", "glyph_recog_ld": 9.99998999939855e-07}, {"polygon": [[6, 232], [12, 243], [33, 229], [39, 232], [39, 232], [51, 218], [43, 199], [43, 199]], "text": "opel", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Ftapel", "recog_valid": false, "glyph_recog_text": "opel", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073620.jpg", "caption": "a group of people cutting a cake with an american flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073626.jpg", "caption": "a large crowd of people are gathered at a motorcycle show", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466845.jpg", "caption": "a group of people standing around a fence with sheep", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335775.jpg", "caption": "a boat is floating on the water near a mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335787.jpg", "caption": "a pink billboard with a movie poster on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073650.jpg", "caption": "a train on a track with the sun behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335804.jpg", "caption": "a train is pulling into a station with a yellow and blue stripe", "annotations": [{"polygon": [[373, 182], [424, 184], [424, 216], [373, 218], [360, 210], [359, 204]], "text": "485 North East Express", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "485", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073661.jpg", "caption": "a red brick building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204735.jpg", "caption": "a woman laying on a surfboard", "annotations": [{"polygon": [[167, 272], [170, 301], [257, 296], [251, 270]], "text": "blue", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "blue", "recog_valid": true, "glyph_recog_text": "blue", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466885.jpg", "caption": "a large airplane parked on a runway at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073686.jpg", "caption": "a laptop, a backpack, a cell phone, a camera, a pen, a wallet, a cell phone charger, a pen, a notebook, a pen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073694.jpg", "caption": "a man on a skateboard doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204785.jpg", "caption": "a train car with a canadian flag on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335860.jpg", "caption": "a bunch of bananas with faces drawn on them", "annotations": [{"polygon": [[118, 314], [118, 314], [134, 310], [149, 354], [132, 360], [116, 314]], "text": "PUPS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PUPS", "recog_valid": true, "glyph_recog_text": "PUPS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466942.jpg", "caption": "a large white bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466949.jpg", "caption": "a yellow school bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466967.jpg", "caption": "a mouse is sitting next to a laptop computer", "annotations": [{"polygon": [[310, 365], [311, 409], [485, 400], [488, 353]], "text": "1of@kind", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "1cf@kind", "recog_valid": false, "glyph_recog_text": "1of@kind", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335913.jpg", "caption": "a bed with a black comforter and a book shelf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466992.jpg", "caption": "a stop sign and a sign that says reserved for boston street patrollers", "annotations": [{"polygon": [[47, 310], [42, 342], [45, 350], [52, 352], [113, 351], [129, 329], [131, 320], [127, 311]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073786.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204888.jpg", "caption": "a person on skis doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073824.jpg", "caption": "a woman cutting a cake with a knife", "annotations": [{"polygon": [[234, 341], [296, 335], [301, 402], [301, 401], [235, 401]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "@", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467081.jpg", "caption": "a person holding a remote control", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204938.jpg", "caption": "a large jet airplane flying through the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467091.jpg", "caption": "a bus driving down a street in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336021.jpg", "caption": "a red truck parked on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073885.jpg", "caption": "two women holding wine bottles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073892.jpg", "caption": "a black and white photo of an old airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467109.jpg", "caption": "a display case with a variety of donuts", "annotations": [{"polygon": [[22, 244], [19, 259], [64, 276], [67, 263]], "text": "Donuts", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Donuts", "recog_valid": true, "glyph_recog_text": "Donuls", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467126.jpg", "caption": "a street sign with a stop sign and a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336061.jpg", "caption": "an air india airplane taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336063.jpg", "caption": "a busy street with many people walking and driving", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336077.jpg", "caption": "a baseball player swinging at a pitch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336078.jpg", "caption": "a group of people standing around a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336081.jpg", "caption": "a small boat with a flag on it parked on the side of the road", "annotations": [{"polygon": [[114, 323], [232, 306], [235, 329], [115, 341]], "text": "ACHOMRAICH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ACHOMRAICH", "recog_valid": true, "glyph_recog_text": "ACHOMRAICH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205022.jpg", "caption": "a blue and yellow motorcycle parked in front of a gas station", "annotations": [{"polygon": [[144, 264], [147, 231], [270, 213], [269, 251]], "text": "movistar", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "movstar", "recog_valid": false, "glyph_recog_text": "movistar", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[218, 180], [221, 171], [269, 192], [266, 203]], "text": "CBR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CDK", "recog_valid": false, "glyph_recog_text": "CBR", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[120, 342], [126, 353], [180, 381], [168, 364]], "text": "HONDA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "钟口特承器", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[155, 327], [152, 333], [201, 388], [205, 384]], "text": "Black Prince", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RcRPaiC", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336107.jpg", "caption": "two men standing next to a plane on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205047.jpg", "caption": "people walking down a street in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073976.jpg", "caption": "an old black and white photo of a harbor with boats", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467193.jpg", "caption": "three baseball players standing on a field", "annotations": [{"polygon": [[280, 78], [280, 110], [358, 103], [356, 75]], "text": "Cable", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Cable", "recog_valid": true, "glyph_recog_text": "Cable", "glyph_recog_ld": 1.0}, {"polygon": [[1, 65], [2, 107], [51, 103], [50, 63]], "text": "O", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "O", "recog_valid": true, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336159.jpg", "caption": "a group of people flying kites in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205116.jpg", "caption": "a man getting his hair cut by another man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467262.jpg", "caption": "a green atm machine in a building", "annotations": [{"polygon": [[296, 232], [314, 262], [191, 391], [173, 355]], "text": "servihanca", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "servihaMa", "recog_valid": false, "glyph_recog_text": "servihanca", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336195.jpg", "caption": "a man and a baby sitting at a table with a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205125.jpg", "caption": "a plate of food with a bowl of soup and a sandwich", "annotations": [{"polygon": [[199, 76], [209, 83], [179, 115], [170, 108]], "text": "PREMIUM", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PREMIIN", "recog_valid": false, "glyph_recog_text": "PPEMJM", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074055.jpg", "caption": "an old airplane is parked in a hangar", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205152.jpg", "caption": "a vase with flowers and a statue of a hindu god", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336231.jpg", "caption": "a group of young women looking at a cell phone", "annotations": [{"polygon": [[210, 328], [216, 353], [273, 340], [268, 315]], "text": "FAST?", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FAST", "recog_valid": false, "glyph_recog_text": "FAST?", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074088.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074090.jpg", "caption": "a baseball player swinging at a pitch during a game", "annotations": [{"polygon": [[408, 328], [418, 358], [452, 352], [442, 322]], "text": "19", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "19", "recog_valid": true, "glyph_recog_text": "19", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074103.jpg", "caption": "a snowboarder in the air doing a trick", "annotations": [{"polygon": [[151, 266], [204, 247], [219, 294], [169, 310]], "text": "ME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ME", "recog_valid": true, "glyph_recog_text": "ME", "glyph_recog_ld": 1.0}, {"polygon": [[183, 359], [236, 340], [258, 390], [199, 404]], "text": "DS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SOS", "recog_valid": false, "glyph_recog_text": "DS", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205181.jpg", "caption": "a blue bus on a bridge over a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467325.jpg", "caption": "a person with a hat and scissors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336289.jpg", "caption": "a man holding a snowboard", "annotations": [{"polygon": [[174, 164], [160, 286], [185, 288], [199, 174]], "text": "ATOMIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ATOMIC", "recog_valid": true, "glyph_recog_text": "4-02-", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336291.jpg", "caption": "a bunch of oranges", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205223.jpg", "caption": "four different pictures of a man in a suit and tie", "annotations": [{"polygon": [[339, 302], [404, 302], [408, 335], [340, 336]], "text": "KRISTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KRISTA", "recog_valid": true, "glyph_recog_text": "KRISTA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467384.jpg", "caption": "a man on a tennis court", "annotations": [{"polygon": [[94, 134], [129, 135], [138, 124], [145, 135], [214, 135], [204, 159], [185, 159], [159, 168], [153, 158], [102, 157]], "text": "valspar", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "valspar", "recog_valid": true, "glyph_recog_text": "valspar", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336314.jpg", "caption": "a glass of wine next to a bottle of wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074177.jpg", "caption": "two children laying on a large bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074179.jpg", "caption": "a pink truck and a yellow truck are parked on a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074183.jpg", "caption": "a black and white photo of a baseball team", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336350.jpg", "caption": "a group of people standing in a field holding umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336369.jpg", "caption": "a person sitting at a desk with a book open", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205299.jpg", "caption": "a group of people standing around a table with drinks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467461.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336390.jpg", "caption": "a person holding a nokia lumia phone in their hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205323.jpg", "caption": "a man is getting on a jet with a ladder", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205362.jpg", "caption": "a traffic light that is leaning over the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205367.jpg", "caption": "two pictures of a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205384.jpg", "caption": "a man in a white shirt and blue shorts is about to hit a tennis ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205392.jpg", "caption": "a group of people riding bikes and cars on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467564.jpg", "caption": "a busy street with many people and vehicles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074354.jpg", "caption": "a man sitting on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467572.jpg", "caption": "a pizza on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336503.jpg", "caption": "a group of people standing around a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205432.jpg", "caption": "a man standing at a train station waiting for a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205439.jpg", "caption": "two red fire trucks parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205461.jpg", "caption": "a fruit stand with a variety of fruits and vegetables", "annotations": [{"polygon": [[296, 245], [313, 246], [329, 292], [308, 291]], "text": "$12.00", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "00215", "recog_valid": false, "glyph_recog_text": "312.00", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336541.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[255, 140], [247, 178], [271, 182], [280, 142]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "U兑", "recog_valid": false, "glyph_recog_text": "92", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336552.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074415.jpg", "caption": "a street corner at night with a street sign and a person walking", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467640.jpg", "caption": "a group of people on horses with a large can of beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074429.jpg", "caption": "an old postcard of people at the dock with boats", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467678.jpg", "caption": "people standing in front of a bus with a sign on it", "annotations": [{"polygon": [[342, 147], [335, 156], [332, 162], [330, 167], [327, 182], [315, 182], [301, 181], [308, 165], [313, 166], [320, 153], [333, 137]], "text": "Diue", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "naypo", "recog_valid": false, "glyph_recog_text": "enig", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[382, 147], [370, 153], [382, 173], [386, 181], [405, 170], [402, 163], [393, 167], [387, 156]], "text": "SeuL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "seul.", "recog_valid": false, "glyph_recog_text": "Se", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336621.jpg", "caption": "a bowl of oranges on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205564.jpg", "caption": "two young boys playing with a ball and a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336651.jpg", "caption": "a person typing on a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467723.jpg", "caption": "a man holding a surfboard on a bus", "annotations": [{"polygon": [[33, 169], [34, 188], [77, 170], [73, 144]], "text": "RIP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "RIT", "recog_valid": false, "glyph_recog_text": "RIP", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[81, 134], [82, 156], [153, 117], [142, 94]], "text": "CURL", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "CURL", "recog_valid": true, "glyph_recog_text": "CURL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074515.jpg", "caption": "a woman standing in front of a pile of luggage", "annotations": [{"polygon": [[224, 253], [234, 258], [243, 246], [258, 238], [272, 235], [284, 238], [292, 242], [298, 231], [289, 226], [273, 222], [261, 224], [252, 226], [239, 236], [231, 243]], "text": "UNIVERSITA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NEINEIRSIT", "recog_valid": false, "glyph_recog_text": "LINIVERSITA", "glyph_recog_ld": 0.45454595041277235}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336660.jpg", "caption": "two remotes sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467745.jpg", "caption": "a young boy in a baseball uniform standing on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205604.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[121, 149], [115, 175], [114, 195], [140, 209], [153, 166]], "text": "21", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "lZ", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336690.jpg", "caption": "united airlines plane parked at the gate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467762.jpg", "caption": "a cat is sitting in a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205620.jpg", "caption": "a black and white photo of a baseball player pitching a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467763.jpg", "caption": "chop chop, london, england", "annotations": [{"polygon": [[134, 155], [141, 153], [146, 154], [147, 141], [153, 141], [153, 154], [159, 153], [196, 154], [204, 154], [210, 159], [211, 167], [208, 175], [202, 179], [196, 175], [196, 186], [190, 186], [190, 176], [136, 177], [129, 173], [127, 164], [127, 159]], "text": "chop", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "chop", "recog_valid": true, "glyph_recog_text": "chop", "glyph_recog_ld": 1.0}, {"polygon": [[219, 168], [221, 161], [225, 156], [234, 156], [239, 156], [239, 141], [243, 142], [243, 157], [250, 155], [273, 156], [286, 154], [289, 157], [296, 156], [301, 160], [301, 166], [301, 173], [298, 179], [290, 179], [289, 177], [288, 188], [283, 187], [282, 177], [228, 177], [224, 177], [221, 174]], "text": "chop", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "chop", "recog_valid": true, "glyph_recog_text": "chop", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467778.jpg", "caption": "a motorcycle is parked next to a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336720.jpg", "caption": "a white scooter parked on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205648.jpg", "caption": "a woman in a bikini is standing in front of a window with tennis balls", "annotations": [{"polygon": [[138, 357], [360, 356], [361, 411], [138, 409]], "text": "TENNIS?", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TENINISP", "recog_valid": false, "glyph_recog_text": "TENNIS?", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205656.jpg", "caption": "a bus driving down a street with a man standing on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336734.jpg", "caption": "a person on skis standing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336770.jpg", "caption": "a train traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205703.jpg", "caption": "a desk with a computer and a bookcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205707.jpg", "caption": "a baseball player is swinging a bat at a ball", "annotations": [{"polygon": [[392, 369], [510, 371], [492, 421], [383, 420]], "text": "Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Potynpie.", "recog_valid": false, "glyph_recog_text": "Photography", "glyph_recog_ld": 0.2727279338836964}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467858.jpg", "caption": "two women in wetsuits standing on the beach holding surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205724.jpg", "caption": "a fruit stand with lots of apples and oranges", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336797.jpg", "caption": "a blue and white train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467872.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205744.jpg", "caption": "a woman skiing down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467929.jpg", "caption": "a man in a red and black jacket holding skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074739.jpg", "caption": "a pink banner is hanging over a brick archway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336897.jpg", "caption": "a man standing on a snow covered mountain", "annotations": [{"polygon": [[361, 155], [409, 174], [413, 159], [364, 143], [362, 155]], "text": "BJR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UHNE", "recog_valid": false, "glyph_recog_text": "BJR", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205845.jpg", "caption": "a group of men sitting on motorcycles at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468011.jpg", "caption": "a woman holding a tennis racket on a tennis court", "annotations": [{"polygon": [[3, 303], [194, 306], [201, 384], [109, 382], [97, 415], [0, 412]], "text": "rgan", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "rgan", "recog_valid": true, "glyph_recog_text": "rgan", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336954.jpg", "caption": "a display case with a variety of pizzas", "annotations": [{"polygon": [[233, 12], [263, 29], [265, 35], [262, 44], [231, 28], [231, 17]], "text": "Thank", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Thank", "recog_valid": true, "glyph_recog_text": "Thank", "glyph_recog_ld": 1.0}, {"polygon": [[243, -1], [293, 28], [304, -2]], "text": "greatly", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "50", "recog_valid": false, "glyph_recog_text": "greatly", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205887.jpg", "caption": "a parking meter with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336966.jpg", "caption": "a group of airplanes parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468083.jpg", "caption": "a man walking on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337018.jpg", "caption": "an old black and white photo of horses pulling a carriage down a snow covered street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468095.jpg", "caption": "a food processor sitting on a counter next to a bowl of flour", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205963.jpg", "caption": "a boy sitting on a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205981.jpg", "caption": "a snowboarder doing a trick on a ramp", "annotations": [{"polygon": [[313, 355], [313, 378], [375, 390], [376, 362]], "text": "TIGNES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TIGNES", "recog_valid": true, "glyph_recog_text": "TIGNES", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337062.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[219, 114], [246, 100], [249, 126], [228, 133], [217, 128]], "text": "50 t", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "60", "recog_valid": false, "glyph_recog_text": "50t", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[210, 124], [208, 156], [297, 210], [297, 184]], "text": "Stanford", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Stanford", "recog_valid": true, "glyph_recog_text": "Stanford", "glyph_recog_ld": 1.0}, {"polygon": [[168, 236], [319, 254], [327, 317], [162, 302]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[209, 324], [210, 365], [288, 372], [290, 329]], "text": "OPO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ORO", "recog_valid": false, "glyph_recog_text": "OPO", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337087.jpg", "caption": "a police officer on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468159.jpg", "caption": "a street with a traffic light and a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074947.jpg", "caption": "a red and white hat on a mannequin head", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468163.jpg", "caption": "a man in a suit holding a red ukulete", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337104.jpg", "caption": "a horse drawn carriage on a brick road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074964.jpg", "caption": "a table with a plate of food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206062.jpg", "caption": "a man riding a wave on a yellow surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074997.jpg", "caption": "a black and white photo of a train on a train track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337146.jpg", "caption": "a table topped with a variety of donuts and other snacks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468219.jpg", "caption": "groom getting ready for his wedding", "annotations": [{"polygon": [[180, 403], [180, 403], [338, 405], [328, 433], [321, 431], [290, 430], [252, 432], [178, 423]], "text": "photograph", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "hotcgiaphy", "recog_valid": false, "glyph_recog_text": "photograph", "glyph_recog_ld": 0.6000003999996}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337153.jpg", "caption": "a bottle of hair dye sitting on a counter next to a mirror", "annotations": [{"polygon": [[99, 274], [99, 301], [179, 297], [178, 271]], "text": "Preference", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Preference", "recog_valid": true, "glyph_recog_text": "Preference", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206082.jpg", "caption": "a kitchen with a box in the middle of the room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468263.jpg", "caption": "a police officer on a horse and a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468284.jpg", "caption": "a little girl holding a tennis racket", "annotations": [{"polygon": [[267, 471], [295, 408], [221, 377], [189, 437]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "W", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206155.jpg", "caption": "an old black and white photo of a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468305.jpg", "caption": "a bus driving down a street with people walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468313.jpg", "caption": "a woman and child sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468318.jpg", "caption": "a car is stopped at a red light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075120.jpg", "caption": "a man and a woman standing in front of a brick building", "annotations": [{"polygon": [[328, 173], [344, 178], [369, 185], [394, 196], [387, 212], [383, 214], [371, 211], [360, 204], [349, 200], [333, 194]], "text": "TOWN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OUJN", "recog_valid": false, "glyph_recog_text": "TOWN", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[245, 170], [275, 211], [289, 201], [322, 189], [325, 171], [318, 170], [258, 165], [248, 167]], "text": "TOON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "出", "recog_valid": false, "glyph_recog_text": "TOON", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[257, 123], [289, 164], [322, 165], [394, 194], [403, 189], [407, 180], [403, 141], [387, 122], [358, 117], [308, 107], [273, 118]], "text": "TOON TOWN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "30色", "recog_valid": false, "glyph_recog_text": "TOON TOWN", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075122.jpg", "caption": "two people walking down a street at night with umbrellas", "annotations": [{"polygon": [[356, 92], [357, 116], [358, 117], [416, 97], [413, 86], [381, 87]], "text": "FLAMIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "FAMINOV", "recog_valid": false, "glyph_recog_text": "FLAMIN", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337265.jpg", "caption": "a person holding a sandwich with a toy in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206198.jpg", "caption": "a young boy swinging a bat at a baseball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337325.jpg", "caption": "a nokia phone is displayed in a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206255.jpg", "caption": "a cat sitting on a yellow blanket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206256.jpg", "caption": "a bus is parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468420.jpg", "caption": "a girl on a skateboard doing a trick on cones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337353.jpg", "caption": "a large airplane parked on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468442.jpg", "caption": "sheep grazing in a field near a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468443.jpg", "caption": "thomas the train cake", "annotations": [{"polygon": [[299, 212], [301, 212], [369, 216], [382, 135], [305, 131], [299, 210], [299, 212]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3", "recog_valid": true, "glyph_recog_text": "3", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468444.jpg", "caption": "a large jet airplane taking off from the runway", "annotations": [{"polygon": [[128, 242], [193, 249], [188, 284], [130, 276], [123, 249]], "text": "Ted", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ted", "recog_valid": true, "glyph_recog_text": "Ted", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337384.jpg", "caption": "a man sitting on a motorcycle in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337394.jpg", "caption": "a giraffe statue in front of a sign that says no parking", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468469.jpg", "caption": "a bus stop with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075256.jpg", "caption": "a street sign with many different signs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075266.jpg", "caption": "a street sign with a clock and a no turn on it", "annotations": [{"polygon": [[429, 287], [430, 316], [455, 320], [461, 317], [463, 311], [463, 281], [460, 281], [447, 290]], "text": "e", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ej", "recog_valid": false, "glyph_recog_text": "e", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[287, 278], [284, 306], [295, 309], [301, 310], [406, 311], [414, 294], [414, 288], [369, 279]], "text": "GALLER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Galler", "recog_valid": false, "glyph_recog_text": "GALLER", "glyph_recog_ld": 0.1666680555532407}, {"polygon": [[319, 332], [319, 360], [352, 364], [389, 357], [429, 357], [429, 326], [375, 326], [361, 334]], "text": "cykel", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "cykel", "recog_valid": true, "glyph_recog_text": "cykel", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075267.jpg", "caption": "a delta airplane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206341.jpg", "caption": "a pile of signs", "annotations": [{"polygon": [[112, 287], [86, 325], [92, 330], [120, 286]], "text": "FFIC ON", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CWNHILL", "recog_valid": false, "glyph_recog_text": "不子长格限", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075270.jpg", "caption": "a man is making a cake", "annotations": [{"polygon": [[10, 463], [10, 492], [22, 494], [45, 494], [50, 486], [50, 472], [29, 462]], "text": "To", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "To", "recog_valid": true, "glyph_recog_text": "To", "glyph_recog_ld": 1.0}, {"polygon": [[63, 462], [62, 503], [158, 493], [158, 477], [144, 462]], "text": "finish", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "finish", "recog_valid": true, "glyph_recog_text": "finish", "glyph_recog_ld": 1.0}, {"polygon": [[173, 472], [172, 493], [262, 493], [261, 472], [255, 465], [207, 462]], "text": "what", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "what", "recog_valid": true, "glyph_recog_text": "what", "glyph_recog_ld": 1.0}, {"polygon": [[295, 466], [294, 494], [418, 493], [416, 462]], "text": "started", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "started", "recog_valid": true, "glyph_recog_text": "started", "glyph_recog_ld": 1.0}, {"polygon": [[211, 19], [207, 74], [491, 71], [499, 60], [494, 34], [453, 20]], "text": "Desserts", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Desserts", "recog_valid": true, "glyph_recog_text": "Desserts", "glyph_recog_ld": 1.0}, {"polygon": [[257, 72], [254, 100], [256, 136], [293, 138], [484, 136], [492, 128], [489, 71], [450, 71]], "text": "SWEETS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SWEETS", "recog_valid": true, "glyph_recog_text": "SWEETS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337415.jpg", "caption": "a yellow and black train engine on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337422.jpg", "caption": "a mug is in a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468503.jpg", "caption": "an ambulance is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206363.jpg", "caption": "a group of airplanes parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075299.jpg", "caption": "two men are loading a truck with boxes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468519.jpg", "caption": "a man sitting on a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337448.jpg", "caption": "a desk with two computers and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468529.jpg", "caption": "a store with lots of glass and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206394.jpg", "caption": "a boat with a trailer attached to it is parked in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468542.jpg", "caption": "a man is riding a horse through a field of cows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468538.jpg", "caption": "a toothbrush and toothpaste in a cup", "annotations": [{"polygon": [[295, 184], [338, 181], [314, 330], [279, 330]], "text": "colgate", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "olgate", "recog_valid": false, "glyph_recog_text": "00-0", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206405.jpg", "caption": "a stack of books on a desk with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206416.jpg", "caption": "a clock tower in the middle of a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206454.jpg", "caption": "a man holding a glass of wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337547.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337546.jpg", "caption": "a kitchen with stainless steel appliances and white cabinets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206475.jpg", "caption": "a sandwich and fries in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337552.jpg", "caption": "a man and a woman walking down the street with surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337555.jpg", "caption": "a display of red donuts on a wooden table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206486.jpg", "caption": "a man and woman walking in the ocean holding a surfboard", "annotations": [{"polygon": [[403, 177], [403, 177], [414, 166], [426, 177], [439, 176], [439, 180], [431, 181], [450, 195], [453, 192], [455, 194], [453, 196], [454, 199], [453, 201], [464, 210], [458, 217], [447, 209], [446, 212], [446, 212], [454, 225], [455, 222], [457, 222], [458, 226], [456, 230], [454, 230], [451, 225], [444, 215], [438, 210], [430, 204], [422, 202], [415, 200], [407, 199], [411, 196], [412, 193], [417, 192]], "text": "Wavestorms", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "neuihee", "recog_valid": false, "glyph_recog_text": "Wavestorns", "glyph_recog_ld": 0.1000008999991}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206501.jpg", "caption": "a person standing on a skateboard with a sign on it", "annotations": [{"polygon": [[250, 181], [240, 297], [265, 297], [277, 184]], "text": "DESTROY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5O21SEC", "recog_valid": false, "glyph_recog_text": "awoFr", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206504.jpg", "caption": "a large airplane in a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468652.jpg", "caption": "a pickup truck with its doors open in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337585.jpg", "caption": "a laptop computer sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075450.jpg", "caption": "a man swinging a tennis racket at a tennis ball", "annotations": [{"polygon": [[369, 50], [446, 59], [446, 81], [441, 82], [438, 80], [389, 74], [369, 69], [367, 68]], "text": "usopen.org", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "usopen.org", "recog_valid": true, "glyph_recog_text": "usopen.org", "glyph_recog_ld": 1.0}, {"polygon": [[369, 145], [409, 150], [408, 167], [411, 183], [367, 177]], "text": "IBM", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "EM", "recog_valid": false, "glyph_recog_text": "IBM", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075493.jpg", "caption": "a street sign and a sign for a restaurant", "annotations": [{"polygon": [[295, 150], [295, 150], [341, 157], [344, 164], [345, 180], [296, 173]], "text": "Trust", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Trust", "recog_valid": true, "glyph_recog_text": "Trust", "glyph_recog_ld": 1.0}, {"polygon": [[299, 206], [350, 194], [357, 198], [360, 216], [320, 228], [298, 229], [297, 219]], "text": "Corporate", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Corporate", "recog_valid": true, "glyph_recog_text": "Corporate", "glyph_recog_ld": 1.0}, {"polygon": [[75, 261], [60, 272], [59, 293], [66, 299], [76, 297], [87, 290], [132, 289], [139, 282], [140, 276], [136, 268], [136, 268], [132, 264], [119, 271], [108, 272], [91, 271], [89, 262]], "text": "Cane's", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Canes", "recog_valid": false, "glyph_recog_text": "Cane's", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337648.jpg", "caption": "a young boy in a baseball uniform", "annotations": [{"polygon": [[235, 254], [235, 254], [217, 282], [260, 279], [293, 273], [291, 258], [284, 248], [272, 246], [262, 249]], "text": "Athlets", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AtBA", "recog_valid": false, "glyph_recog_text": "Athlets", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337675.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206606.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206628.jpg", "caption": "two women in a kitchen preparing food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075565.jpg", "caption": "a traffic sign is on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075585.jpg", "caption": "an orange double decker bus parked next to other buses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468818.jpg", "caption": "a man is holding a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468826.jpg", "caption": "a group of people sitting at a table eating food", "annotations": [{"polygon": [[133, 399], [95, 435], [102, 446], [144, 408]], "text": "MOS", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "EOO", "recog_valid": false, "glyph_recog_text": "MQs", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337760.jpg", "caption": "an old photo of a fire truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075621.jpg", "caption": "a group of people standing under a canopy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206697.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075630.jpg", "caption": "an old fashioned steam engine train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337779.jpg", "caption": "a person holding up a cell phone with a picture of a man on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075637.jpg", "caption": "a woman holding a frisbee in her hand", "annotations": [{"polygon": [[194, 355], [187, 364], [203, 375], [233, 389], [257, 394], [259, 378], [233, 373], [219, 367]], "text": "STRALIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STRALIA", "recog_valid": true, "glyph_recog_text": "STRALIA", "glyph_recog_ld": 1.0}, {"polygon": [[208, 331], [269, 356], [262, 376], [195, 357]], "text": "D.ORG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "O.ORG", "recog_valid": false, "glyph_recog_text": "D.ORG", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337780.jpg", "caption": "a man riding an elephant down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075643.jpg", "caption": "a man in a suit and tie sitting at a table with other people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337796.jpg", "caption": "a double decker bus", "annotations": [{"polygon": [[486, 100], [486, 100], [476, 126], [471, 128], [472, 131], [511, 130], [510, 125], [507, 124], [505, 124], [495, 99]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "A", "recog_valid": true, "glyph_recog_text": "A", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337802.jpg", "caption": "a black and white photo of a bird perched on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337806.jpg", "caption": "a red train engine on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206735.jpg", "caption": "a group of people standing in a field with tents", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075682.jpg", "caption": "a woman sitting at a table using a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468912.jpg", "caption": "a clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337842.jpg", "caption": "a baseball player standing on a field with a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337875.jpg", "caption": "a train traveling down the tracks near some rocks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075740.jpg", "caption": "a man in a black shirt and white shorts is about to throw a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468966.jpg", "caption": "a man sitting on a bed with a pizza and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206830.jpg", "caption": "two black bears walking across a fallen log in a stream", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468977.jpg", "caption": "a tow truck parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206841.jpg", "caption": "a birthday cake with candles on it", "annotations": [{"polygon": [[361, 268], [382, 285], [400, 281], [427, 278], [435, 273], [445, 259], [453, 253], [454, 249], [445, 237], [443, 235], [425, 233], [409, 232], [422, 242], [405, 250], [400, 259], [364, 261]], "text": "happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "happy", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337930.jpg", "caption": "a man standing next to a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075800.jpg", "caption": "three people are riding horses in the woods", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469026.jpg", "caption": "a blue car driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206889.jpg", "caption": "a plane on the runway", "annotations": [{"polygon": [[122, 221], [152, 190], [218, 195], [226, 201], [208, 227], [201, 229]], "text": "SAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SAS", "recog_valid": true, "glyph_recog_text": "SAS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206890.jpg", "caption": "a young boy playing with toys in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206913.jpg", "caption": "a bicycle and a horse in a field", "annotations": [{"polygon": [[365, 360], [390, 396], [396, 392], [371, 354], [367, 356]], "text": "SURLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SURLY", "recog_valid": true, "glyph_recog_text": "S品产LY", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075846.jpg", "caption": "a car with stuffed animals on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469092.jpg", "caption": "a pink castle cake with a princess on top", "annotations": [{"polygon": [[99, 362], [111, 375], [136, 387], [143, 372], [115, 350], [99, 362]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "tappy", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338030.jpg", "caption": "a laptop sitting on a desk in an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469105.jpg", "caption": "a black and white photo of people waiting for a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206968.jpg", "caption": "a man is holding a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469124.jpg", "caption": "a person walking across a snowy street with a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075925.jpg", "caption": "people walking on a wet sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206998.jpg", "caption": "a display of scissors and other items on a shelf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207013.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075968.jpg", "caption": "a woman holding a tennis racket", "annotations": [{"polygon": [[156, 25], [156, 25], [426, 21], [428, 112], [156, 114]], "text": "PARI", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PARII", "recog_valid": false, "glyph_recog_text": "PARI", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338118.jpg", "caption": "a clock on a cart with a horse and a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207065.jpg", "caption": "two giraffes walking in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076004.jpg", "caption": "a pink bear in a parade with people walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076009.jpg", "caption": "airport terminal with parked cars and an airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207083.jpg", "caption": "a woman sitting on the floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338173.jpg", "caption": "a man is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338197.jpg", "caption": "a man in a green shirt is playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469274.jpg", "caption": "a man in a green shirt is throwing a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469315.jpg", "caption": "a train pulling into a station with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207223.jpg", "caption": "a basketball game in an arena with orange and white uniforms", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076159.jpg", "caption": "a parking meter on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338317.jpg", "caption": "a woman walking down the sidewalk on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207250.jpg", "caption": "a group of teddy bears dressed in red uniforms", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469405.jpg", "caption": "a laptop computer sitting on a couch with a remote control", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469410.jpg", "caption": "a bench sitting in a field with fog in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207276.jpg", "caption": "a man with luggage and a cart", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338370.jpg", "caption": "a clock with the words sewing central oregon on it", "annotations": [{"polygon": [[164, 308], [175, 322], [188, 333], [204, 344], [214, 348], [214, 355], [208, 367], [191, 360], [177, 351], [163, 339], [152, 326], [147, 318], [149, 312], [158, 306], [162, 306]], "text": "SERVING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SERVING", "recog_valid": true, "glyph_recog_text": "SERVING", "glyph_recog_ld": 1.0}, {"polygon": [[316, 354], [322, 362], [329, 362], [345, 351], [361, 337], [371, 325], [380, 311], [361, 303], [345, 323], [334, 333], [323, 341], [315, 345], [313, 350]], "text": "OREGON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OREGON", "recog_valid": true, "glyph_recog_text": "ORECON", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207317.jpg", "caption": "two women sitting on a bench next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338407.jpg", "caption": "a man is leaning on a motorcycle next to a gas tank", "annotations": [{"polygon": [[232, 359], [235, 366], [240, 370], [246, 378], [257, 387], [263, 395], [284, 409], [293, 406], [286, 395], [273, 388], [265, 385], [260, 380], [251, 373], [246, 364], [241, 353], [236, 355]], "text": "Corse GP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CorseGP", "recog_valid": false, "glyph_recog_text": "Corse GP", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[217, 395], [257, 431], [255, 425], [252, 417], [221, 388]], "text": "PREFA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "PREFA", "recog_valid": true, "glyph_recog_text": "vngea", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[481, 437], [513, 464], [511, 454], [486, 433]], "text": "PRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "P", "recog_valid": false, "glyph_recog_text": "…", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469495.jpg", "caption": "people sitting on chairs", "annotations": [{"polygon": [[482, 84], [484, 94], [452, 106], [425, 117], [421, 107]], "text": "CREATIVITY", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "CREATIVITY", "recog_valid": true, "glyph_recog_text": "SREATINTY", "glyph_recog_ld": 0.7000002999996999}, {"polygon": [[426, 118], [432, 134], [478, 117], [511, 105], [512, 98], [512, 89], [507, 87], [469, 101], [446, 110], [430, 116]], "text": "DEMO", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "DEMOC", "recog_valid": false, "glyph_recog_text": "DEMO", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[458, 198], [463, 209], [512, 193], [512, 179]], "text": "CAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ENERG", "recog_valid": false, "glyph_recog_text": "CAN", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[448, 171], [451, 180], [512, 158], [512, 148]], "text": "LOW PRICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "INVISIBILIT", "recog_valid": false, "glyph_recog_text": "中中国", "glyph_recog_ld": 9.090900826569381e-07}, {"polygon": [[433, 135], [440, 150], [512, 126], [511, 110], [511, 107]], "text": "ECOLO", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "ECOLO", "recog_valid": true, "glyph_recog_text": "ECOLO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207378.jpg", "caption": "a man on a bicycle rides past a school bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469545.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[371, 290], [388, 308], [411, 294], [393, 273], [379, 284]], "text": "57", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "57", "recog_valid": true, "glyph_recog_text": "57", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469556.jpg", "caption": "a street sign on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338497.jpg", "caption": "a train is crossing a wooden bridge in the woods", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338509.jpg", "caption": "a refrigerator door with magnets on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338521.jpg", "caption": "a large sign with a clock tower in the background", "annotations": [{"polygon": [[9, 142], [304, 278], [304, 300], [7, 162], [11, 143]], "text": "UNDERGROUND", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "UNDERGROUND", "recog_valid": true, "glyph_recog_text": "UNDERGROUND", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338528.jpg", "caption": "a clock is on top of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207467.jpg", "caption": "a group of people brushing their teeth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469614.jpg", "caption": "a teddy bear and a wooden mannequin", "annotations": [{"polygon": [[265, 301], [263, 320], [385, 334], [393, 319]], "text": "HAPPY BIRTHDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AAPPY BIRTHDAY", "recog_valid": false, "glyph_recog_text": "HAPPY BIRTHDAY", "glyph_recog_ld": 0.9285714795918003}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207475.jpg", "caption": "a man sitting at a table with a gun and a bunch of keys", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338549.jpg", "caption": "a man on a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076414.jpg", "caption": "two men in life jackets standing on the side of a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207521.jpg", "caption": "a woman laying in a hospital bed talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076476.jpg", "caption": "a delta airplane taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338651.jpg", "caption": "a baby in a high chair eating a cake", "annotations": [{"polygon": [[284, 239], [296, 225], [327, 246], [311, 261]], "text": "BIRTHDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ANAY", "recog_valid": false, "glyph_recog_text": "当定果的以!", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338670.jpg", "caption": "a fruit and vegetable market with lots of bananas and other fruits", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207620.jpg", "caption": "a clock on the wall above a vase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338693.jpg", "caption": "a street corner with a traffic light and a sign that says america of fish", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207622.jpg", "caption": "a stop sign with a street sign on it", "annotations": [{"polygon": [[283, 328], [390, 331], [390, 308], [403, 307], [412, 299], [412, 290], [410, 276], [400, 271], [285, 267], [277, 274], [272, 281], [271, 297], [271, 317], [276, 323]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076571.jpg", "caption": "a street sign with a traffic light and a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469798.jpg", "caption": "korean air boeing 747-400", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338732.jpg", "caption": "two dogs laying on a bed with luggage and clothes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469816.jpg", "caption": "a traffic light and a stop sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338745.jpg", "caption": "a double decker bus is driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076607.jpg", "caption": "a woman laying on a surfboard with a cat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469825.jpg", "caption": "two men sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207698.jpg", "caption": "a man riding a horse in an arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076652.jpg", "caption": "a box with forks and spoons inside", "annotations": [{"polygon": [[313, 419], [314, 449], [396, 450], [395, 418]], "text": "Stub", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Stub", "recog_valid": true, "glyph_recog_text": "Stub", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469888.jpg", "caption": "photo of the day - joseph watson, miami marlins", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338833.jpg", "caption": "people stand on the side of the road at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338838.jpg", "caption": "a young boy on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469913.jpg", "caption": "a skier in a blue and green suit is racing down a slope", "annotations": [{"polygon": [[237, 220], [238, 220], [243, 219], [264, 246], [267, 245], [271, 250], [269, 251], [272, 254], [268, 257], [268, 257], [268, 257], [261, 251], [252, 240], [241, 226]], "text": "Slivenija", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Slovenile", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469914.jpg", "caption": "a man holding two stuffed animals in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207782.jpg", "caption": "two girls standing in the rain holding umbrellas", "annotations": [{"polygon": [[214, 104], [277, 136], [275, 157], [223, 134]], "text": "TRIK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TRK", "recog_valid": false, "glyph_recog_text": "TRIK", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338863.jpg", "caption": "a skateboarder doing a trick on a ledge", "annotations": [{"polygon": [[428, 425], [429, 390], [511, 389], [510, 425]], "text": "marpla Skate", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "marpla kcate", "recog_valid": false, "glyph_recog_text": "marpla Skate", "glyph_recog_ld": 0.8333334722221064}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469941.jpg", "caption": "two men playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207808.jpg", "caption": "a train sitting in the desert with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207828.jpg", "caption": "a street corner with a traffic light and tall building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469973.jpg", "caption": "a black and white photo of a young boy on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076784.jpg", "caption": "a skateboarder doing a trick in front of a monument", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207857.jpg", "caption": "a cat's paw resting on a laptop keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076792.jpg", "caption": "an emirates airplane flying through the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207875.jpg", "caption": "an emirates airplane on the runway at an airport", "annotations": [{"polygon": [[207, 210], [462, 205], [466, 256], [210, 258]], "text": "Emirates", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Emirates", "recog_valid": true, "glyph_recog_text": "Emirates", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470028.jpg", "caption": "a woman with long hair standing on the steps of a red tram", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338988.jpg", "caption": "a bunch of vegetables and a bag of cheese", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207932.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076877.jpg", "caption": "a sandwich and a can of coke on a plate", "annotations": [{"polygon": [[49, 220], [56, 230], [89, 210], [81, 200], [72, 206], [69, 203], [56, 208], [60, 215]], "text": "mullar", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "mUllor", "recog_valid": false, "glyph_recog_text": "muller", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[59, 238], [65, 249], [150, 199], [141, 182], [59, 235]], "text": "corner", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "coroer", "recog_valid": false, "glyph_recog_text": "corner", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470097.jpg", "caption": "a baseball player throwing a pitch on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339042.jpg", "caption": "a table with a bowl of soup, bread and a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207980.jpg", "caption": "a cake decorated with baseball themed decorations", "annotations": [{"polygon": [[157, 509], [229, 500], [225, 476], [200, 470], [121, 476], [119, 498]], "text": "Cupcakes", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Gwpeakes", "recog_valid": false, "glyph_recog_text": "Cupcakes", "glyph_recog_ld": 0.6250004687494141}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339054.jpg", "caption": "a chalk drawing of a man", "annotations": [{"polygon": [[65, 308], [75, 293], [112, 277], [127, 280], [151, 288], [170, 311], [153, 322], [116, 320], [76, 317]], "text": "edic", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "edic", "recog_valid": true, "glyph_recog_text": "edic", "glyph_recog_ld": 1.0}, {"polygon": [[202, 296], [226, 283], [226, 283], [258, 297], [258, 297], [255, 333], [241, 336], [241, 336], [181, 322]], "text": "ctg", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "s d9", "recog_valid": false, "glyph_recog_text": "ctg", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[98, 321], [157, 339], [154, 370], [98, 364]], "text": "NEW", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NEW", "recog_valid": true, "glyph_recog_text": "NEW", "glyph_recog_ld": 1.0}, {"polygon": [[158, 333], [223, 338], [249, 350], [238, 381], [199, 376], [167, 366]], "text": "WEST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WFST", "recog_valid": false, "glyph_recog_text": "WEST", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339074.jpg", "caption": "a black and white photo of a street with horse drawn carriages", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339089.jpg", "caption": "a close up of two parking meters", "annotations": [{"polygon": [[162, 152], [180, 141], [192, 134], [206, 130], [217, 125], [237, 121], [252, 121], [252, 133], [239, 135], [212, 142], [193, 151], [176, 162], [170, 167]], "text": "METERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "METERS", "recog_valid": true, "glyph_recog_text": "METERS", "glyph_recog_ld": 1.0}, {"polygon": [[237, 213], [234, 235], [304, 239], [307, 208], [237, 208]], "text": "01 : 13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0113", "recog_valid": false, "glyph_recog_text": "01:13", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[259, 122], [258, 132], [286, 135], [302, 137], [315, 143], [333, 151], [366, 175], [375, 166], [362, 154], [331, 137], [300, 126], [275, 121]], "text": "ENFORCED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ENFORCED", "recog_valid": true, "glyph_recog_text": "ENFORCED", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339094.jpg", "caption": "several airplanes are parked on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208043.jpg", "caption": "a green and red grandfather clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470190.jpg", "caption": "a motorcycle parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208055.jpg", "caption": "a group of people standing around a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076988.jpg", "caption": "a box of scissors sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208067.jpg", "caption": "a street with a stop sign and a car on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208075.jpg", "caption": "two men are sitting on a bench", "annotations": [{"polygon": [[98, 413], [100, 443], [217, 436], [216, 406]], "text": "evan", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ev an", "recog_valid": false, "glyph_recog_text": "evan", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208088.jpg", "caption": "a train on the tracks at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470243.jpg", "caption": "a man is doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339186.jpg", "caption": "a stop sign covered in snow", "annotations": [{"polygon": [[106, 283], [104, 322], [148, 328], [179, 324], [192, 322], [203, 317], [220, 306], [219, 283]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339192.jpg", "caption": "a street sign with a picture of a man and woman running", "annotations": [{"polygon": [[211, 360], [211, 396], [288, 394], [286, 360]], "text": "100", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "100", "recog_valid": true, "glyph_recog_text": "100", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208133.jpg", "caption": "a cake with pink and white decorations on top", "annotations": [{"polygon": [[119, 166], [229, 111], [259, 118], [161, 169]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TERX", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[185, 160], [265, 121], [293, 125], [230, 175]], "text": "60th", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CU", "recog_valid": false, "glyph_recog_text": "60th", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470305.jpg", "caption": "a woman sitting on a wall with many kites hanging from it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339240.jpg", "caption": "a young woman is looking at her cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208172.jpg", "caption": "a man and a woman are looking at their cell phones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339248.jpg", "caption": "a man holding a beer and talking on a cell phone", "annotations": [{"polygon": [[124, 391], [128, 394], [129, 399], [126, 402], [74, 419], [70, 430], [67, 430], [64, 427], [64, 427], [63, 423], [45, 428], [35, 417], [34, 411], [39, 407], [52, 404], [59, 401]], "text": "Vagitarian", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "ofagilawian", "recog_valid": false, "glyph_recog_text": "Vagitariar", "glyph_recog_ld": 0.5454549586773103}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077121.jpg", "caption": "a man in a blue jacket", "annotations": [{"polygon": [[305, 255], [300, 280], [345, 287], [355, 263]], "text": "shell", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Sselff", "recog_valid": false, "glyph_recog_text": "shell", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077160.jpg", "caption": "a small car with a tent on top of it", "annotations": [{"polygon": [[81, 152], [117, 160], [119, 167], [118, 186], [79, 176]], "text": "MIGUEL'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "MIGUELS", "recog_valid": false, "glyph_recog_text": "MIGtELS", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[219, 300], [227, 323], [306, 314], [309, 310], [310, 305], [309, 296], [304, 291]], "text": "Verts", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VeRTS", "recog_valid": false, "glyph_recog_text": "Verts", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339307.jpg", "caption": "a woman in orange shorts and a white tank top holding a tennis racket", "annotations": [{"polygon": [[153, 143], [155, 182], [191, 184], [274, 183], [274, 148], [216, 140]], "text": "Emi", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Emi&", "recog_valid": false, "glyph_recog_text": "Emi", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[280, 144], [278, 184], [367, 187], [367, 146]], "text": "Airline", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Air ne", "recog_valid": false, "glyph_recog_text": "Airline", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208240.jpg", "caption": "a man on a skateboard doing a trick on a rail", "annotations": [{"polygon": [[243, 442], [240, 444], [235, 448], [238, 451], [240, 452], [249, 454], [253, 458], [259, 460], [265, 463], [271, 465], [275, 468], [281, 470], [284, 472], [288, 473], [295, 465], [288, 461], [280, 457], [268, 453], [258, 448]], "text": "SHATEBOARDS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SHAEBOARDS", "recog_valid": false, "glyph_recog_text": "EHROSBOANEGG", "glyph_recog_ld": 0.33333388888842586}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208244.jpg", "caption": "a stop sign is seen through a window", "annotations": [{"polygon": [[219, 213], [286, 207], [286, 243], [220, 249]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208252.jpg", "caption": "a surfer in the air on a wave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470398.jpg", "caption": "a delta airplane parked at the gate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470414.jpg", "caption": "a kitchen with a stove, oven, refrigerator and a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339358.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470442.jpg", "caption": "a group of men working on a pile of snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470454.jpg", "caption": "a bus driving down a street with people on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470463.jpg", "caption": "a tall clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470467.jpg", "caption": "two women in traditional clothing performing a dance", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470472.jpg", "caption": "a group of people standing on a boat looking at something", "annotations": [{"polygon": [[82, 390], [86, 411], [113, 394], [118, 385], [107, 375]], "text": "E", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "", "recog_valid": false, "glyph_recog_text": "E", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208329.jpg", "caption": "a black and white photo of a plane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339403.jpg", "caption": "a stop sign with a blue arrow and a red arrow", "annotations": [{"polygon": [[172, 154], [320, 123], [326, 150], [306, 204], [171, 220]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STAP", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470501.jpg", "caption": "a man and a girl are practicing hitting a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339446.jpg", "caption": "a red bicycle is parked next to a stop sign", "annotations": [{"polygon": [[301, 198], [289, 227], [331, 229], [331, 220], [323, 200]], "text": "AL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AL", "recog_valid": true, "glyph_recog_text": "AL", "glyph_recog_ld": 1.0}, {"polygon": [[224, 104], [224, 104], [276, 104], [280, 112], [277, 122], [270, 124], [269, 138], [224, 140], [218, 131], [219, 110], [221, 107]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[178, 7], [162, 48], [223, 46], [223, 36], [205, 6]], "text": "AL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "AL", "recog_valid": true, "glyph_recog_text": "AL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077304.jpg", "caption": "a bench sitting on the edge of a rocky cliff overlooking the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208379.jpg", "caption": "a man and a child playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470532.jpg", "caption": "a woman with an umbrella walks in the rain", "annotations": [{"polygon": [[381, 90], [371, 108], [377, 114], [392, 112], [416, 107], [448, 100], [453, 91], [448, 83], [445, 79], [396, 88]], "text": "Altman's", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Altmane", "recog_valid": false, "glyph_recog_text": "Altman's", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208396.jpg", "caption": "a man and woman sitting on a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339489.jpg", "caption": "a man and a boy in a park holding a kite", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077346.jpg", "caption": "a hedgehog clock in a box on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208433.jpg", "caption": "a fishing boat is out in the ocean near the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077375.jpg", "caption": "a skateboard is sitting on the floor of a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077376.jpg", "caption": "a birthday cake with candles and sprinkles on it", "annotations": [{"polygon": [[119, 166], [147, 172], [179, 164], [244, 163], [261, 163], [276, 176], [263, 205], [229, 198], [203, 205], [167, 200], [158, 204], [126, 211]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "hePPE", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[313, 165], [344, 160], [392, 163], [409, 163], [427, 170], [449, 178], [469, 179], [507, 201], [510, 211], [484, 255], [460, 242], [465, 222], [452, 220], [399, 204], [389, 197], [371, 194], [337, 194], [308, 195]], "text": "BIRTHDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ex", "recog_valid": false, "glyph_recog_text": "BIRTHDAY", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077378.jpg", "caption": "a blue and white train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077383.jpg", "caption": "a red and yellow double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339535.jpg", "caption": "two black birds sitting on top of dry grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470618.jpg", "caption": "a busy city street with many cars and buses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077417.jpg", "caption": "a cat laying on a desk next to two computer monitors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208511.jpg", "caption": "a baseball game is being played in the outfield", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470672.jpg", "caption": "a street sign with two signs on it", "annotations": [{"polygon": [[110, 282], [112, 336], [264, 311], [264, 253]], "text": "LOCUST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LOCUST", "recog_valid": true, "glyph_recog_text": "LOCUST", "glyph_recog_ld": 1.0}, {"polygon": [[297, 246], [300, 304], [438, 284], [434, 221]], "text": "DRIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DRIVE", "recog_valid": true, "glyph_recog_text": "DRIVE", "glyph_recog_ld": 1.0}, {"polygon": [[287, 344], [283, 390], [353, 404], [356, 361]], "text": "ROAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ROAD", "recog_valid": true, "glyph_recog_text": "ROAD", "glyph_recog_ld": 1.0}, {"polygon": [[271, 342], [267, 385], [123, 356], [125, 341], [209, 327]], "text": "HIGHBURY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "mCHBURY", "recog_valid": false, "glyph_recog_text": "HIGHBURY", "glyph_recog_ld": 0.6250004687494141}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470710.jpg", "caption": "a street with many buildings and signs in asian writing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077504.jpg", "caption": "a large airplane with people boarding the stairs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339670.jpg", "caption": "a group of people standing around a large pile of luggage", "annotations": [{"polygon": [[120, 300], [209, 305], [210, 314], [172, 330], [118, 326]], "text": "OGI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OGI", "recog_valid": true, "glyph_recog_text": "OGI", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339687.jpg", "caption": "a man and woman sitting on a bus", "annotations": [{"polygon": [[306, 93], [305, 122], [366, 135], [368, 109]], "text": "4181", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "181A", "recog_valid": false, "glyph_recog_text": "4181", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[415, 121], [413, 146], [459, 154], [460, 129]], "text": "4181", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "4131", "recog_valid": false, "glyph_recog_text": "4181", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470762.jpg", "caption": "a fire hydrant with a red cap on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208621.jpg", "caption": "a snowboarder is in the air", "annotations": [{"polygon": [[115, 177], [149, 172], [172, 240], [205, 341], [172, 352]], "text": "NITRO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NNTRO", "recog_valid": false, "glyph_recog_text": "NITRO", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077553.jpg", "caption": "a woman in a white shirt is standing on an airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077557.jpg", "caption": "a plate with broccoli, grapes, and pasta on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470774.jpg", "caption": "a living room with a couch, a television, and a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470797.jpg", "caption": "a c-17 transport aircraft parked on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077602.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470819.jpg", "caption": "a baseball player is standing on a mound", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470839.jpg", "caption": "a sign on a building has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208712.jpg", "caption": "a fruit and vegetable stand with many different types of fruit and vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470862.jpg", "caption": "a cat laying on top of a black bag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339790.jpg", "caption": "a street with a red brick building and a car parked in front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339798.jpg", "caption": "a man holding a tennis racket on a tennis court", "annotations": [{"polygon": [[261, 132], [303, 123], [305, 158], [268, 161]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "W", "recog_valid": true, "glyph_recog_text": "w", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208727.jpg", "caption": "a bench sitting by a lake with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208773.jpg", "caption": "a surfboard on the beach with people in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208820.jpg", "caption": "a group of women in aprons holding food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339913.jpg", "caption": "two men standing next to a bus on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339916.jpg", "caption": "a white tote bag with a laptop and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470995.jpg", "caption": "a man standing next to an elephant in a circus", "annotations": [{"polygon": [[351, 32], [354, 84], [402, 79], [396, 29]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "R", "recog_valid": true, "glyph_recog_text": "R", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077785.jpg", "caption": "a plate with a piece of pizza on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339938.jpg", "caption": "a man sitting at a desk with a laptop and a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077806.jpg", "caption": "a dog is sitting in a cage on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339966.jpg", "caption": "a group of teddy bears sitting on a dresser", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208898.jpg", "caption": "a rusty train car sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208907.jpg", "caption": "the inside of an old train car with a chair inside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208910.jpg", "caption": "a dog sitting on the steps of an old building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339987.jpg", "caption": "a cat laying in a wicker chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471085.jpg", "caption": "a bathroom with a urinal and toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208954.jpg", "caption": "a large jetliner flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208963.jpg", "caption": "a man holding an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471139.jpg", "caption": "a computer keyboard and mouse on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077928.jpg", "caption": "a man is petting a horse in a stable", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340081.jpg", "caption": "a man in a green shirt carrying a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340082.jpg", "caption": "a white bus parked on a street in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340095.jpg", "caption": "a black and white photo of a tennis team", "annotations": [{"polygon": [[333, 242], [337, 286], [349, 286], [369, 242]], "text": "7", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "卜", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340102.jpg", "caption": "a woman in a green apron is preparing food in a kitchen", "annotations": [{"polygon": [[500, 398], [506, 393], [487, 380], [450, 356], [440, 359], [465, 375], [469, 382], [481, 387]], "text": "BOGGLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "2OOE", "recog_valid": false, "glyph_recog_text": "BOS银LE", "glyph_recog_ld": 0.3333344444425925}, {"polygon": [[441, 368], [441, 375], [478, 398], [480, 392], [463, 381], [458, 376], [454, 375]], "text": "BOGGLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BOGAE", "recog_valid": false, "glyph_recog_text": "GGGWsN", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471188.jpg", "caption": "a bus and a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077973.jpg", "caption": "a painting of a street with a man walking down the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340128.jpg", "caption": "a parking meter on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340129.jpg", "caption": "a man in a red shirt and baseball cap is standing next to a man in a blue shirt and baseball cap", "annotations": [{"polygon": [[17, 333], [2, 320], [12, 304], [32, 294], [56, 293], [75, 300], [94, 310], [108, 324], [92, 336], [74, 323], [53, 316], [35, 318], [25, 325]], "text": "DEBRANO", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "EBRANO", "recog_valid": false, "glyph_recog_text": "DEBRANO", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[24, 331], [28, 330], [81, 330], [82, 345], [83, 375], [77, 379], [32, 375]], "text": "00", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "00", "recog_valid": true, "glyph_recog_text": "00", "glyph_recog_ld": 1.0}, {"polygon": [[365, 233], [353, 224], [346, 222], [342, 213], [337, 212], [334, 244], [349, 249], [373, 250], [373, 243]], "text": "Pilles", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ino.", "recog_valid": false, "glyph_recog_text": "Pitles", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340152.jpg", "caption": "a blue ice cream truck with the words gazoozles on it", "annotations": [{"polygon": [[121, 259], [126, 289], [133, 297], [175, 287], [219, 274], [272, 260], [293, 255], [288, 232], [255, 225], [183, 243], [182, 240]], "text": "Gadzooks!", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Gadzooks", "recog_valid": false, "glyph_recog_text": "Gadzooks!", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078009.jpg", "caption": "two cakes in boxes", "annotations": [{"polygon": [[115, 307], [188, 287], [192, 300], [116, 320]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIRTHOAY", "recog_valid": false, "glyph_recog_text": "Bizthday", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[105, 323], [171, 303], [177, 330], [111, 354]], "text": "HAPPY BIRTHDA GOPAL..", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Gepa", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[370, 312], [437, 294], [444, 307], [377, 327]], "text": "HAPPY BIRTHDAY, GOPAL.", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BIRTHORY", "recog_valid": false, "glyph_recog_text": "wefi19pirn6ate", "glyph_recog_ld": 7.142852040953329e-07}, {"polygon": [[380, 322], [386, 344], [444, 322], [441, 313]], "text": "GOPAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "GOOPAL", "recog_valid": false, "glyph_recog_text": "GOPAL", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209084.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340157.jpg", "caption": "a black and white photo of cows in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209092.jpg", "caption": "a wall full of clocks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471245.jpg", "caption": "a plate of food on a table with a glass of orange juice", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471256.jpg", "caption": "a man wearing a blue shirt with the words cultivate peace play frisbee", "annotations": [{"polygon": [[233, 223], [231, 246], [275, 252], [314, 257], [345, 256], [378, 257], [411, 254], [417, 231]], "text": "CULTIVATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CULTIVATE", "recog_valid": true, "glyph_recog_text": "CULTIVATE", "glyph_recog_ld": 1.0}, {"polygon": [[232, 250], [232, 282], [372, 295], [407, 291], [412, 258]], "text": "PEACE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PEACE", "recog_valid": true, "glyph_recog_text": "PEACE", "glyph_recog_ld": 1.0}, {"polygon": [[234, 304], [404, 316], [401, 350], [236, 342]], "text": "PLAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PLAY", "recog_valid": true, "glyph_recog_text": "PLAY", "glyph_recog_ld": 1.0}, {"polygon": [[236, 341], [234, 368], [287, 378], [367, 380], [401, 379], [402, 352]], "text": "FRISBEE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FRISBEE", "recog_valid": true, "glyph_recog_text": "FRISBEE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209126.jpg", "caption": "a woman playing a video game with a man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078058.jpg", "caption": "a man dressed as a wolf is standing next to a fire hydrant", "annotations": [{"polygon": [[335, 386], [359, 384], [379, 381], [396, 373], [393, 391], [381, 397], [362, 401], [340, 403], [331, 401]], "text": "WILFRED", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WILFREI", "recog_valid": false, "glyph_recog_text": "WILFRED", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[332, 428], [363, 428], [378, 424], [394, 416], [394, 452], [364, 466], [331, 466]], "text": "FX", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "FX", "recog_valid": true, "glyph_recog_text": "FX", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471280.jpg", "caption": "two orange trains on tracks near each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078087.jpg", "caption": "a white bus parked in front of a building", "annotations": [{"polygon": [[393, 173], [420, 167], [422, 193], [391, 197]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "12", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340244.jpg", "caption": "a couple of motorcycles parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209176.jpg", "caption": "a red car driving down a street with a sign that says 30 mph", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340258.jpg", "caption": "an air canada airplane flying through the sky", "annotations": [{"polygon": [[314, 210], [315, 215], [392, 184], [392, 179]], "text": "CANADA ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "nahscl", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340259.jpg", "caption": "four bottles of red wine sitting on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340267.jpg", "caption": "a pizza on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471339.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209200.jpg", "caption": "a woman is standing next to a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340282.jpg", "caption": "a woman holding a sign that says gulf sale", "annotations": [{"polygon": [[202, 156], [202, 190], [258, 181], [259, 150]], "text": "GULF", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GULF", "recog_valid": true, "glyph_recog_text": "GULF", "glyph_recog_ld": 1.0}, {"polygon": [[202, 190], [200, 224], [256, 215], [259, 185]], "text": "SALE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SALE", "recog_valid": true, "glyph_recog_text": "SALE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340283.jpg", "caption": "a clock on a pole in a shopping mall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471357.jpg", "caption": "a train is parked in the snow with a snowman", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340292.jpg", "caption": "a family sitting at a table with pizza and beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471376.jpg", "caption": "a sand sculpture of a person with a frisbee", "annotations": [{"polygon": [[36, 284], [56, 281], [67, 290], [84, 312], [119, 318], [75, 368], [47, 342], [34, 342], [25, 329]], "text": "By", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "BY", "recog_valid": false, "glyph_recog_text": "B y", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[42, 366], [176, 373], [181, 410], [182, 412], [147, 418], [127, 422], [42, 410]], "text": "MIKE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "IMLKE", "recog_valid": false, "glyph_recog_text": "MIKE", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340317.jpg", "caption": "a young boy and girl standing next to a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340331.jpg", "caption": "two young boys holding surfboards on the beach", "annotations": [{"polygon": [[76, 308], [122, 354], [133, 321], [88, 280]], "text": "Hsn", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "Hsn", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[180, 367], [241, 294], [259, 309], [199, 384]], "text": "Sank", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1园nos", "recog_valid": false, "glyph_recog_text": "Sank", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[380, 420], [370, 390], [260, 426], [327, 428]], "text": "Kona", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "AUPUOY", "recog_valid": false, "glyph_recog_text": "Kona", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[227, 174], [226, 209], [238, 214], [256, 195], [255, 180]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": true, "glyph_recog_text": "<", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471405.jpg", "caption": "a man and a woman playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471480.jpg", "caption": "two pictures of a man walking down the street with a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209345.jpg", "caption": "a skateboarder is doing a trick on a ramp", "annotations": [{"polygon": [[277, 327], [398, 315], [397, 295], [273, 303]], "text": "SKATEBOARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SKATEBOARD", "recog_valid": true, "glyph_recog_text": "SKATEBOARD", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209353.jpg", "caption": "two girls playing soccer on a field with a soccer ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078307.jpg", "caption": "a row of motorcycles parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340459.jpg", "caption": "a group of women playing frisbee in a field", "annotations": [{"polygon": [[312, 309], [351, 306], [351, 306], [353, 335], [313, 336]], "text": "00", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "00", "recog_valid": true, "glyph_recog_text": "00", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471535.jpg", "caption": "a clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078322.jpg", "caption": "a group of baseball players posing for a photo", "annotations": [{"polygon": [[47, 207], [39, 216], [41, 224], [44, 234], [52, 231], [52, 227], [71, 236], [75, 237], [80, 231], [89, 227], [90, 224], [84, 221], [83, 214], [78, 216], [78, 218], [52, 207]], "text": "grizzlys", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Greyglo", "recog_valid": false, "glyph_recog_text": "grizzlys", "glyph_recog_ld": 0.2500009374988281}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209394.jpg", "caption": "a motorcycle is on display at a motorcycle show", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209405.jpg", "caption": "a car parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209418.jpg", "caption": "a large airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340494.jpg", "caption": "two children sitting on the floor with a comb", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209449.jpg", "caption": "a basket of onions and broccoli", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340523.jpg", "caption": "a street with a lot of signs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340532.jpg", "caption": "a man walking down the street with a backpack", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078394.jpg", "caption": "a laptop computer and a cell phone sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340544.jpg", "caption": "a train is pulling into a station with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078412.jpg", "caption": "a group of people standing on a sidewalk near a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078413.jpg", "caption": "a red sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471629.jpg", "caption": "a clock on a pole in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209503.jpg", "caption": "two boys playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209513.jpg", "caption": "a man jumping to hit a tennis ball", "annotations": [{"polygon": [[385, 213], [409, 199], [424, 218], [398, 234]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "6", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471669.jpg", "caption": "a city street with a crosswalk and a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209537.jpg", "caption": "two men sitting at a table with plates of food", "annotations": [{"polygon": [[61, 359], [80, 341], [96, 329], [102, 315], [110, 309], [122, 305], [128, 305], [134, 307], [136, 329], [129, 329], [117, 339], [100, 349], [91, 364], [83, 374], [71, 382]], "text": "WI", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "WIAOS", "recog_valid": false, "glyph_recog_text": "W I", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471690.jpg", "caption": "a man holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471711.jpg", "caption": "a bedroom with a bed, clock and a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340647.jpg", "caption": "a traffic light on a city street with a car in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209603.jpg", "caption": "a table with a couple of pizzas on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078553.jpg", "caption": "a group of people waiting for a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209638.jpg", "caption": "a tray with a sandwich and a drink on it", "annotations": [{"polygon": [[152, 271], [139, 273], [126, 170], [149, 159], [164, 255]], "text": "Diet Coke", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PPU", "recog_valid": false, "glyph_recog_text": ".", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471783.jpg", "caption": "a man in a kayak riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209643.jpg", "caption": "a man playing a guitar on a street corner", "annotations": [{"polygon": [[12, 112], [61, 111], [60, 152], [8, 151], [7, 112]], "text": "BLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "BLE", "recog_valid": true, "glyph_recog_text": "BLE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078572.jpg", "caption": "a truck and a traffic light on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471790.jpg", "caption": "a kitchen with a stove, sink and oven in a van", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340727.jpg", "caption": "a woman sitting at a table with a pen and a bottle of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209671.jpg", "caption": "a white truck with a red and green bumper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471822.jpg", "caption": "a building with a clock on the front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471824.jpg", "caption": "a man in a hat and glasses drinking a red bull", "annotations": [{"polygon": [[308, 265], [303, 267], [294, 276], [287, 289], [285, 294], [275, 288], [278, 280], [284, 272], [289, 265], [297, 260]], "text": "Red Bull", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Red Bun", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078609.jpg", "caption": "a man on skis racing another man on skis", "annotations": [{"polygon": [[162, 201], [162, 201], [164, 247], [258, 258], [262, 206]], "text": "BMW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BMW", "recog_valid": true, "glyph_recog_text": "BMW", "glyph_recog_ld": 1.0}, {"polygon": [[308, 262], [424, 280], [430, 272], [431, 248], [418, 234], [395, 234], [370, 232], [365, 214], [355, 214], [358, 230], [305, 224]], "text": "Drive", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Srive", "recog_valid": false, "glyph_recog_text": "Drive", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[460, 232], [470, 241], [475, 232], [481, 227], [492, 228], [504, 237], [508, 245], [511, 231], [504, 220], [490, 214], [481, 214], [462, 223]], "text": "BMW", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "BMW", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[32, 192], [34, 231], [74, 234], [73, 193]], "text": "XDY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "xDr", "recog_valid": false, "glyph_recog_text": "¥", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471825.jpg", "caption": "a woman in a yellow dress playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340758.jpg", "caption": "a semi truck with a flag painted on it", "annotations": [{"polygon": [[427, 375], [326, 436], [338, 448], [439, 449], [495, 382]], "text": "BU", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "CP", "recog_valid": false, "glyph_recog_text": "BU", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209701.jpg", "caption": "a birthday cake with a baseball uniform, bat, and glove", "annotations": [{"polygon": [[307, 342], [400, 340], [400, 313], [366, 275], [315, 273], [307, 292]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "12", "glyph_recog_ld": 1.0}, {"polygon": [[86, 179], [180, 137], [247, 121], [331, 121], [396, 136], [404, 82], [304, 66], [205, 73], [92, 112], [77, 150]], "text": "jihwan", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "J16969", "recog_valid": false, "glyph_recog_text": "jihwan", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[124, 75], [179, 55], [180, 70], [133, 86]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HLPRY", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471860.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209718.jpg", "caption": "a vintage postcard of a kitten eating from a cup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209746.jpg", "caption": "a stop sign on a wooden post", "annotations": [{"polygon": [[71, 230], [75, 183], [189, 195], [187, 237]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471900.jpg", "caption": "a man and woman standing next to a car with skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078689.jpg", "caption": "a laptop computer and a dog on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078695.jpg", "caption": "a plate with a banana, an orange, an apple, and an onion", "annotations": [{"polygon": [[370, 148], [390, 165], [402, 168], [419, 171], [437, 171], [452, 170], [455, 174], [453, 180], [434, 187], [427, 187], [404, 179], [393, 177], [381, 172], [369, 164], [365, 157], [366, 151], [368, 149]], "text": "Orange", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Orange", "recog_valid": true, "glyph_recog_text": "Orange", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209768.jpg", "caption": "an old photo of a group of men standing next to a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078716.jpg", "caption": "a parking meter with a coin slot and a coin slot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209813.jpg", "caption": "a boat sits on the sand near the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471966.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471967.jpg", "caption": "a person using a cell phone and a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471987.jpg", "caption": "a motorcycle racer is going down a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078772.jpg", "caption": "a stack of books on top of a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471995.jpg", "caption": "a skateboarder is riding a ramp at a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209859.jpg", "caption": "a group of trains parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472012.jpg", "caption": "a man on a skateboard doing a trick on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078825.jpg", "caption": "a large airplane parked on a runway", "annotations": [{"polygon": [[305, 248], [305, 248], [331, 261], [330, 267], [320, 280], [286, 262]], "text": "Alitalia", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "litalia", "recog_valid": false, "glyph_recog_text": "Alitalia", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209906.jpg", "caption": "a view of a street with a traffic light and a car driving down it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340988.jpg", "caption": "a dog is eating out of a bowl", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472065.jpg", "caption": "a row of trucks parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472089.jpg", "caption": "a yellow double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078880.jpg", "caption": "a pile of suitcases", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341027.jpg", "caption": "a woman talking on a cell phone on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472103.jpg", "caption": "a bus and a truck are driving down the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209969.jpg", "caption": "a street sign with many signs on it", "annotations": [{"polygon": [[157, 36], [152, 62], [215, 108], [221, 84]], "text": "UNLEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "UNLEY", "recog_valid": true, "glyph_recog_text": "UNLEY", "glyph_recog_ld": 1.0}, {"polygon": [[229, 92], [227, 120], [268, 149], [269, 124]], "text": "HIGH", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HIGH", "recog_valid": true, "glyph_recog_text": "HIGH", "glyph_recog_ld": 1.0}, {"polygon": [[277, 130], [276, 157], [336, 205], [337, 182]], "text": "SCHOOL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SCHOOL", "recog_valid": true, "glyph_recog_text": "SCHOOL", "glyph_recog_ld": 1.0}, {"polygon": [[194, 242], [197, 219], [247, 228], [246, 249]], "text": "JOSEPH'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JOSEPH'S", "recog_valid": true, "glyph_recog_text": "JOSEPHS", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[184, 347], [182, 375], [235, 377], [234, 349]], "text": "SPORTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPORTS", "recog_valid": true, "glyph_recog_text": "SPORTS", "glyph_recog_ld": 1.0}, {"polygon": [[124, 345], [122, 372], [173, 375], [175, 347]], "text": "INDOOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "INDOOR", "recog_valid": true, "glyph_recog_text": "INDOOR", "glyph_recog_ld": 1.0}, {"polygon": [[232, 154], [237, 176], [281, 184], [285, 165]], "text": "TUTT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TUTT", "recog_valid": true, "glyph_recog_text": "TUTT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341052.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[268, 181], [283, 179], [294, 179], [317, 192], [328, 204], [331, 221], [322, 246], [316, 234], [311, 225], [294, 221], [288, 218], [264, 221]], "text": "LOWELL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EW", "recog_valid": false, "glyph_recog_text": "LOWELL.", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[250, 224], [298, 245], [323, 261], [316, 303], [309, 336], [255, 316], [209, 296]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "26", "recog_valid": false, "glyph_recog_text": "25", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472133.jpg", "caption": "a banana and a box of green pills", "annotations": [{"polygon": [[239, 275], [230, 285], [268, 306], [277, 295]], "text": "HP5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HP5", "recog_valid": true, "glyph_recog_text": "HP5", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341062.jpg", "caption": "a street corner with a traffic light and people walking", "annotations": [{"polygon": [[183, 149], [309, 125], [311, 148], [182, 173]], "text": "Fulton", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "Fuiton", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[186, 125], [195, 132], [195, 128], [198, 129], [198, 134], [202, 136], [202, 133], [204, 133], [204, 138], [216, 146], [217, 153], [216, 158], [212, 158], [210, 160], [204, 157], [205, 153], [184, 141], [184, 124]], "text": "Bridge", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bndge", "recog_valid": false, "glyph_recog_text": "Bridge", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[91, 199], [91, 235], [130, 230], [133, 193]], "text": "JAZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "A7", "recog_valid": false, "glyph_recog_text": "JAZ", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[1, 214], [0, 245], [84, 235], [86, 199]], "text": "JIMMY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "mM", "recog_valid": false, "glyph_recog_text": "JIMMY", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210010.jpg", "caption": "a woman cutting a cake with a man in uniform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210024.jpg", "caption": "a virgin airlines plane flying in the sky", "annotations": [{"polygon": [[77, 271], [104, 253], [126, 278], [90, 308]], "text": "Virqin", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Jogin", "recog_valid": false, "glyph_recog_text": "Virqir", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210057.jpg", "caption": "a train traveling down the tracks in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078986.jpg", "caption": "a tennis match in a stadium with a large crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210063.jpg", "caption": "a man holding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472209.jpg", "caption": "a young boy holding a baseball bat in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472226.jpg", "caption": "a microwave with a green light on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472258.jpg", "caption": "a birthday cake shaped like a thomas the train engine", "annotations": [{"polygon": [[186, 219], [186, 219], [177, 182], [195, 180], [213, 215], [212, 221]], "text": "1", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "<", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[222, 179], [222, 179], [239, 161], [226, 145], [200, 165]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "al", "recog_valid": false, "glyph_recog_text": "10", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341189.jpg", "caption": "a man on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341206.jpg", "caption": "a person standing on a tennis court with a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341226.jpg", "caption": "a person's feet on a rug with a cell phone", "annotations": [{"polygon": [[0, 382], [112, 382], [113, 425], [0, 425]], "text": "BYMAHS", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "BYnnahs", "recog_valid": false, "glyph_recog_text": "BYMAHS", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472309.jpg", "caption": "a girl with red hair holding a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341241.jpg", "caption": "a group of people on horses in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341245.jpg", "caption": "two trains are traveling down the tracks with smoke coming out of the engines", "annotations": [{"polygon": [[119, 249], [121, 291], [161, 291], [157, 252]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6", "recog_valid": true, "glyph_recog_text": "6", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079106.jpg", "caption": "a sign that says open house on the side of a street", "annotations": [{"polygon": [[307, 129], [320, 163], [353, 152], [372, 152], [371, 141], [332, 131]], "text": "YVISTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Vi5", "recog_valid": false, "glyph_recog_text": "YVISTA", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341271.jpg", "caption": "two slices of pizza", "annotations": [{"polygon": [[427, 150], [422, 182], [438, 187], [452, 188], [476, 189], [488, 190], [493, 189], [493, 167], [491, 171], [489, 167], [483, 167], [479, 167], [478, 158], [468, 159], [466, 159], [466, 169], [456, 166], [448, 165], [447, 157]], "text": "Coke", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Cok", "recog_valid": false, "glyph_recog_text": "Coke", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079152.jpg", "caption": "an older man in a jacket and tie talking to a reporter", "annotations": [{"polygon": [[279, 396], [350, 341], [360, 364], [287, 418]], "text": "ESPN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ESrn", "recog_valid": false, "glyph_recog_text": "ESPN", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472394.jpg", "caption": "a skateboarder in the air performing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210249.jpg", "caption": "a woman in a white shirt and blue shorts is walking down a hallway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472400.jpg", "caption": "a train is parked at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079191.jpg", "caption": "a stop sign on a street corner", "annotations": [{"polygon": [[226, 144], [226, 144], [301, 145], [305, 158], [289, 182], [221, 180], [215, 172], [219, 148]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472439.jpg", "caption": "a baseball player is throwing a ball to a batter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341369.jpg", "caption": "a man in overalls is standing in front of a train", "annotations": [{"polygon": [[174, 79], [207, 78], [208, 109], [174, 111]], "text": "060", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "360", "recog_valid": false, "glyph_recog_text": "060", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341385.jpg", "caption": "a motorcycle and a box are parked next to a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079260.jpg", "caption": "a group of men in green shirts posing for a photo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079269.jpg", "caption": "a display case with various food items and drinks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472495.jpg", "caption": "a teddy bear, flowers and candles are placed on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079305.jpg", "caption": "a man is kiteboarding in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341459.jpg", "caption": "a baseball player is getting ready to hit the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210395.jpg", "caption": "children are eating salad in a cafeteria with adults", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210401.jpg", "caption": "a motorcycle parked in front of a restaurant", "annotations": [{"polygon": [[80, 211], [105, 210], [98, 64], [69, 66]], "text": "01", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "O-", "recog_valid": false, "glyph_recog_text": "or", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[190, 121], [186, 134], [186, 149], [190, 158], [241, 164], [244, 131]], "text": "OPEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OPEN", "recog_valid": true, "glyph_recog_text": "OPEN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472549.jpg", "caption": "a group of motorcycles parked outside a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079345.jpg", "caption": "a man in the air on skis", "annotations": [{"polygon": [[291, 259], [292, 265], [349, 240], [348, 234]], "text": "ROSSLENOL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONSEHT", "recog_valid": false, "glyph_recog_text": "ryunieuy.", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210471.jpg", "caption": "a woman sitting at a desk with a computer in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210472.jpg", "caption": "a man and woman with a baby sitting on a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472620.jpg", "caption": "a desk with a laptop and a coffee mug", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472622.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210480.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[377, 116], [395, 181], [181, 277], [168, 211], [357, 118]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[273, 323], [279, 349], [327, 337], [322, 311]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[268, 351], [268, 351], [264, 332], [230, 340], [227, 365]], "text": "ALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALL", "recog_valid": true, "glyph_recog_text": "ALL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079441.jpg", "caption": "a table with a bunch of bottles and glasses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472659.jpg", "caption": "a man on a skateboard doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079486.jpg", "caption": "a man wearing a hat", "annotations": [{"polygon": [[2, 388], [8, 391], [20, 403], [33, 416], [44, 419], [52, 425], [52, 425], [42, 436], [0, 416], [0, 416]], "text": "REJECT", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "FJE", "recog_valid": false, "glyph_recog_text": "EJECT", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210568.jpg", "caption": "a teddy bear and a clock on a bed", "annotations": [{"polygon": [[129, 301], [188, 267], [192, 274], [134, 307]], "text": "LIBERTY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LIBERTY", "recog_valid": true, "glyph_recog_text": "上IKRRIT", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079498.jpg", "caption": "a truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210580.jpg", "caption": "a man standing next to a motorcycle on a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341663.jpg", "caption": "a person holding a remote control in front of a television", "annotations": [{"polygon": [[506, 408], [496, 428], [457, 409], [472, 389]], "text": "1", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "pu", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[328, 342], [306, 370], [291, 365], [312, 340]], "text": "VOL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VO", "recog_valid": false, "glyph_recog_text": "VOL", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079528.jpg", "caption": "a window with a watch on it", "annotations": [{"polygon": [[239, 347], [405, 334], [416, 295], [271, 300]], "text": "MASHU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MASHU", "recog_valid": true, "glyph_recog_text": "MASHU", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079530.jpg", "caption": "a stop sign with a man on it", "annotations": [{"polygon": [[251, 139], [361, 132], [365, 184], [305, 187], [251, 184], [247, 173], [245, 151], [245, 139]], "text": "STTOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STCP", "recog_valid": false, "glyph_recog_text": "STTOP", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472749.jpg", "caption": "two men sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341695.jpg", "caption": "a woman holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079563.jpg", "caption": "an old blue truck with its hood open", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079567.jpg", "caption": "a man in a white shirt and tie standing in front of a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341714.jpg", "caption": "a stack of books on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341736.jpg", "caption": "a view of a street at dusk from inside a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210672.jpg", "caption": "a pizza box sitting on top of a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079603.jpg", "caption": "a young girl on a skateboard in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079613.jpg", "caption": "a chocolate shoe cake with a bow on top", "annotations": [{"polygon": [[183, 310], [217, 306], [221, 307], [234, 319], [233, 328], [226, 340], [221, 340], [214, 333], [197, 332], [183, 336], [179, 336], [173, 333], [170, 328], [172, 322], [173, 317], [176, 314]], "text": "cake", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cane", "recog_valid": false, "glyph_recog_text": "cake", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[312, 310], [362, 306], [374, 310], [385, 318], [386, 328], [368, 341], [307, 337], [301, 333], [299, 327], [301, 319], [303, 315], [307, 312]], "text": "Cookie", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cookie", "recog_valid": true, "glyph_recog_text": "Cookie", "glyph_recog_ld": 1.0}, {"polygon": [[398, 306], [390, 338], [509, 343], [509, 318]], "text": "Company", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Company", "recog_valid": true, "glyph_recog_text": "Company", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472831.jpg", "caption": "a bathroom with a toilet, sink and shower curtain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341781.jpg", "caption": "a young girl in a green shirt is making pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079645.jpg", "caption": "a green and yellow train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210752.jpg", "caption": "a table with a variety of desserts on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079684.jpg", "caption": "a baseball player with a bat on the field", "annotations": [{"polygon": [[323, 241], [344, 218], [355, 211], [368, 210], [385, 218], [387, 240], [394, 225], [396, 198], [389, 182], [373, 173], [356, 166], [342, 169], [327, 182], [314, 192], [300, 204], [298, 214], [318, 240]], "text": "CRAWFOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EAUO", "recog_valid": false, "glyph_recog_text": "cnuoron", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[338, 318], [362, 319], [373, 277], [371, 223], [346, 227], [337, 291]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GV", "recog_valid": false, "glyph_recog_text": "~", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079710.jpg", "caption": "a sign for a deli and a sign for steel", "annotations": [{"polygon": [[289, 179], [340, 152], [346, 169], [293, 198]], "text": "DELL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DELI", "recog_valid": false, "glyph_recog_text": "DELL", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[275, 214], [378, 157], [380, 178], [279, 233]], "text": "GROCERY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GROCERY", "recog_valid": true, "glyph_recog_text": "GROCERY", "glyph_recog_ld": 1.0}, {"polygon": [[437, 325], [442, 346], [458, 337], [470, 341], [483, 357], [485, 342], [472, 328], [453, 320]], "text": "CAMEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CRAME", "recog_valid": false, "glyph_recog_text": "CAMEL", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210813.jpg", "caption": "a woman in a robe standing next to a garage door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472968.jpg", "caption": "a woman with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210845.jpg", "caption": "a group of children eating pizza at a birthday party", "annotations": [{"polygon": [[25, 363], [45, 382], [17, 410], [6, 381]], "text": "ALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "心", "recog_valid": false, "glyph_recog_text": "ALL", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[41, 382], [50, 392], [23, 427], [19, 424], [19, 409]], "text": "TAX", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "", "recog_valid": false, "glyph_recog_text": "TAX", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210849.jpg", "caption": "an old black and white photo of a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341922.jpg", "caption": "a bus is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210861.jpg", "caption": "a semi truck with a man on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210881.jpg", "caption": "a clock tower is seen in the middle of a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473029.jpg", "caption": "two women are preparing food in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473040.jpg", "caption": "a large airplane sitting on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341985.jpg", "caption": "a red apple, an orange and a banana", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342005.jpg", "caption": "a yellow train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473082.jpg", "caption": "a woman and a man standing next to a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210956.jpg", "caption": "a cat laying on a table next to a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079896.jpg", "caption": "a black and white photo of a bathroom with a toilet and ladder", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210983.jpg", "caption": "a clock in a chicken coop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473134.jpg", "caption": "a tray with bowls of fruit and chocolate", "annotations": [{"polygon": [[440, 268], [449, 288], [514, 266], [509, 244]], "text": "USTR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "HISO", "recog_valid": false, "glyph_recog_text": "USTR", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473137.jpg", "caption": "a laptop computer and keyboard sitting on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342086.jpg", "caption": "two men posing for a picture with a christmas tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079949.jpg", "caption": "a large clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211030.jpg", "caption": "a grandfather clock in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211033.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342122.jpg", "caption": "a stop sign has texts written on it", "annotations": [{"polygon": [[287, 313], [366, 323], [365, 367], [290, 357]], "text": "WAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAR", "recog_valid": true, "glyph_recog_text": "WAR", "glyph_recog_ld": 1.0}, {"polygon": [[183, 181], [436, 227], [424, 319], [161, 276]], "text": "STOP ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[209, 302], [272, 312], [269, 353], [208, 343]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THE", "recog_valid": true, "glyph_recog_text": "THE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211051.jpg", "caption": "a man sitting on the floor next to a guitar amp", "annotations": [{"polygon": [[135, 42], [145, 6], [165, 29], [193, 25], [198, 19], [225, 0], [228, 0], [235, 32], [268, 26], [284, 46], [286, 56], [250, 64], [256, 83], [225, 86], [211, 80], [163, 91]], "text": "LUKOIL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "fuk", "recog_valid": false, "glyph_recog_text": "LUKOIL", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473195.jpg", "caption": "a black train engine pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342132.jpg", "caption": "a truck is parked in front of a building", "annotations": [{"polygon": [[169, 216], [168, 253], [244, 247], [244, 214]], "text": "Retrograde", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Retrograde", "recog_valid": true, "glyph_recog_text": "Retrograde", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342135.jpg", "caption": "a girl in a white uniform playing soccer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342141.jpg", "caption": "a large jet airplane taking off from an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342174.jpg", "caption": "a man sitting on a couch with a dog", "annotations": [{"polygon": [[436, 419], [487, 395], [484, 380], [431, 406]], "text": "2111365", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "211/365", "recog_valid": false, "glyph_recog_text": "2111368", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[478, 426], [510, 410], [505, 396], [475, 411], [475, 415]], "text": "2008", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "2008", "recog_valid": true, "glyph_recog_text": "2008", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080065.jpg", "caption": "a living room with a couch, a table, a chair, a computer and a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080078.jpg", "caption": "a display of bananas and other fruits in a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342234.jpg", "caption": "a man sitting on a bench in front of a closed door", "annotations": [{"polygon": [[166, 12], [166, 50], [227, 50], [229, 14]], "text": "Le", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Le", "recog_valid": true, "glyph_recog_text": "Le", "glyph_recog_ld": 1.0}, {"polygon": [[241, 12], [241, 50], [335, 53], [333, 15]], "text": "Bar", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Bar", "recog_valid": true, "glyph_recog_text": "Bar", "glyph_recog_ld": 1.0}, {"polygon": [[347, 14], [343, 52], [377, 52], [376, 16]], "text": "a", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "a", "recog_valid": true, "glyph_recog_text": "a", "glyph_recog_ld": 1.0}, {"polygon": [[389, 16], [390, 52], [425, 53], [425, 16]], "text": "N", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "N", "recog_valid": true, "glyph_recog_text": "N", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342259.jpg", "caption": "a person is cutting a pizza with a knife", "annotations": [{"polygon": [[100, 265], [102, 281], [83, 292], [63, 297], [35, 296], [28, 294], [23, 279], [59, 283], [85, 276]], "text": "TABASCO", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "TABASC", "recog_valid": false, "glyph_recog_text": "TABASCO", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211190.jpg", "caption": "a baseball player throwing a ball", "annotations": [{"polygon": [[330, 319], [293, 302], [294, 330], [328, 350]], "text": "18", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "18", "recog_valid": true, "glyph_recog_text": "18", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473347.jpg", "caption": "a blue fire hydrant on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211205.jpg", "caption": "a man and woman with a child in a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342277.jpg", "caption": "a baseball player in purple and black uniform holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080140.jpg", "caption": "the old wooden train is parked at the station", "annotations": [{"polygon": [[324, 300], [378, 322], [381, 330], [381, 335], [369, 336], [324, 314], [321, 303]], "text": "THIRD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THIRD", "recog_valid": true, "glyph_recog_text": "THIRO", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080147.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[181, 17], [181, 17], [301, 45], [305, 80], [180, 52]], "text": "RAHW", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "RAHW", "recog_valid": true, "glyph_recog_text": "RAHW", "glyph_recog_ld": 1.0}, {"polygon": [[200, 124], [200, 124], [292, 57], [296, 110], [200, 175]], "text": "SKELTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SKELTON", "recog_valid": true, "glyph_recog_text": "SKELTON", "glyph_recog_ld": 1.0}, {"polygon": [[186, 302], [319, 319], [329, 337], [324, 354], [305, 388], [183, 378], [168, 365], [166, 357], [172, 305]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[207, 384], [223, 382], [240, 383], [290, 388], [319, 400], [321, 418], [303, 430], [289, 430], [274, 430], [264, 426], [237, 428], [227, 428], [207, 429]], "text": "MYSPACE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "MNSPAC", "recog_valid": false, "glyph_recog_text": "MYSPACE", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473375.jpg", "caption": "two people riding snowboards down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473384.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080180.jpg", "caption": "a man on a skateboard doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342325.jpg", "caption": "a table with a cup of coffee and a plate of bread", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080187.jpg", "caption": "a batter is standing at home plate ready to hit the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342336.jpg", "caption": "a large white boat docked at the shore", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342363.jpg", "caption": "a plate of food with broccoli, potatoes, and green beans", "annotations": [{"polygon": [[469, 321], [468, 286], [505, 293], [512, 294], [511, 318]], "text": "AC", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AC", "recog_valid": true, "glyph_recog_text": "AC", "glyph_recog_ld": 1.0}, {"polygon": [[448, 330], [454, 327], [480, 379], [476, 385]], "text": "AKUNIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AKUNIN", "recog_valid": true, "glyph_recog_text": "44y4:4", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[406, 249], [418, 239], [417, 249], [444, 301], [438, 310]], "text": "ACHILLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "NOUIULES", "recog_valid": false, "glyph_recog_text": "ACHILLES", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211302.jpg", "caption": "a kitchen with a table and chairs in it", "annotations": [{"polygon": [[401, 261], [410, 251], [425, 202], [409, 192], [396, 240], [399, 246], [389, 258]], "text": "MIGIK", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "290", "recog_valid": false, "glyph_recog_text": "MIGIK", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211322.jpg", "caption": "a large group of sheep in a barn with a sign on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211327.jpg", "caption": "a boy on a skateboard doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342408.jpg", "caption": "a small dog is standing under an umbrella", "annotations": [{"polygon": [[290, 443], [365, 456], [369, 467], [286, 478], [270, 468]], "text": "Luminaria", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Jenienaria", "recog_valid": false, "glyph_recog_text": "Luminaria", "glyph_recog_ld": 0.6000003999996}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473491.jpg", "caption": "a coffee cup with a mouse on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211351.jpg", "caption": "a horse and carriage on the street with people walking by", "annotations": [{"polygon": [[416, 328], [409, 380], [442, 430], [454, 429], [458, 375]], "text": "466", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "466", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473495.jpg", "caption": "a street sign with a red and white sign", "annotations": [{"polygon": [[134, 141], [134, 141], [219, 133], [226, 168], [132, 179]], "text": "3,9m", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3.9m", "recog_valid": false, "glyph_recog_text": "3,9m", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080279.jpg", "caption": "a woman in green shirt and white skirt playing tennis", "annotations": [{"polygon": [[204, 240], [209, 243], [230, 260], [230, 266], [217, 276], [190, 252]], "text": "IRish", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IRish", "recog_valid": true, "glyph_recog_text": "IRish", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080283.jpg", "caption": "a red train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473503.jpg", "caption": "a clock tower with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473508.jpg", "caption": "a woman holding up two laptops and a tablet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080293.jpg", "caption": "a baseball game in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080295.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[89, 357], [90, 381], [152, 391], [229, 371], [163, 359], [105, 354]], "text": "daughter", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Heocakeu", "recog_valid": false, "glyph_recog_text": "daughter", "glyph_recog_ld": 0.12500109374863277}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080305.jpg", "caption": "two men sitting at a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211381.jpg", "caption": "a black and white photo of a train at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342459.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473531.jpg", "caption": "a horse standing next to a car in front of a store", "annotations": [{"polygon": [[307, 212], [271, 273], [513, 260], [513, 216]], "text": "Knudse", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Knudes", "recog_valid": false, "glyph_recog_text": "Knudse", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[329, 423], [288, 392], [513, 385], [512, 423]], "text": "OP", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "", "recog_valid": false, "glyph_recog_text": "OP", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211410.jpg", "caption": "a fire hydrant with a black and white top", "annotations": [{"polygon": [[284, 331], [283, 383], [238, 392], [240, 339]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CC", "recog_valid": false, "glyph_recog_text": "co", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211436.jpg", "caption": "a clock on a brick wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080386.jpg", "caption": "a line of donuts being made on a conveyor belt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342532.jpg", "caption": "a man on a motorcycle in the street", "annotations": [{"polygon": [[203, 110], [216, 158], [220, 158], [240, 122], [233, 63]], "text": "GLH", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "52", "recog_valid": false, "glyph_recog_text": "①一工", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080389.jpg", "caption": "a young boy standing in front of a sign with bicycles on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473607.jpg", "caption": "a baseball player is running to home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080397.jpg", "caption": "a hotel room with a television, desk, and mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473616.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211485.jpg", "caption": "a bunch of teddy bears with british flags", "annotations": [{"polygon": [[260, 121], [284, 166], [274, 175], [249, 130]], "text": "Bear", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BPrighton", "recog_valid": false, "glyph_recog_text": "Bear", "glyph_recog_ld": 0.1111120987643347}, {"polygon": [[267, 106], [299, 163], [290, 170], [259, 114]], "text": "Brighton", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ori.onal", "recog_valid": false, "glyph_recog_text": "Briohto", "glyph_recog_ld": 0.37500078124902336}, {"polygon": [[151, 186], [192, 197], [228, 204], [251, 206], [251, 190], [221, 185], [200, 181], [171, 176], [155, 173]], "text": "BRIGHTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BRIGHTO", "recog_valid": false, "glyph_recog_text": "BRIGHTON", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[222, 419], [234, 430], [255, 402], [289, 366], [277, 359], [244, 392]], "text": "BRIGHTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "BRIGHTC", "recog_valid": false, "glyph_recog_text": "BRIGHTON", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080422.jpg", "caption": "a large airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473642.jpg", "caption": "a red and black train on a gravel path", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080431.jpg", "caption": "a red fire truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080441.jpg", "caption": "a man walking in front of a large building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080448.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080471.jpg", "caption": "a black and white photo of people walking on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080475.jpg", "caption": "a small child standing next to a red fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211546.jpg", "caption": "a man and woman on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342693.jpg", "caption": "two sheep in a pen with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473773.jpg", "caption": "a group of people sitting on the snow with skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473779.jpg", "caption": "a figurine of a teddy bear wearing a hat and holding a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080566.jpg", "caption": "a computer monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080582.jpg", "caption": "two benches sitting next to each other with a sign that says refuge", "annotations": [{"polygon": [[307, 155], [304, 188], [358, 190], [380, 191], [384, 161], [342, 158]], "text": "RUSPIDGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RUSPIDCE", "recog_valid": false, "glyph_recog_text": "RUSPIDGE", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[59, 356], [55, 391], [105, 396], [105, 373], [102, 357], [93, 349], [78, 346], [69, 346]], "text": "GWR", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "店", "recog_valid": false, "glyph_recog_text": "GWR", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[199, 320], [192, 332], [191, 354], [236, 363], [239, 344], [233, 327], [229, 320], [222, 316], [210, 316]], "text": "GWG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GN", "recog_valid": false, "glyph_recog_text": "GWG", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211678.jpg", "caption": "a male tennis player in blue shirt and white shorts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342757.jpg", "caption": "a boat in the water with a man on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342797.jpg", "caption": "a stop sign with stickers on it", "annotations": [{"polygon": [[185, 142], [185, 142], [195, 141], [286, 143], [285, 193], [182, 186]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[196, 349], [276, 353], [276, 376], [275, 378], [198, 380]], "text": "SOUTH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SOUTH", "recog_valid": true, "glyph_recog_text": "SOUTH", "glyph_recog_ld": 1.0}, {"polygon": [[127, 172], [127, 196], [186, 190], [182, 161]], "text": "FOD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FOO", "recog_valid": false, "glyph_recog_text": "FOD", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[377, 94], [379, 132], [426, 132], [426, 94]], "text": "MA", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "MA", "recog_valid": true, "glyph_recog_text": "MA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473870.jpg", "caption": "a group of people standing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473891.jpg", "caption": "a double decker bus parked on the side of the road", "annotations": [{"polygon": [[250, 352], [252, 371], [262, 365], [269, 363], [285, 368], [286, 359], [269, 341]], "text": "ELC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ll.", "recog_valid": false, "glyph_recog_text": "ELC", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080686.jpg", "caption": "a motorcycle racer is waving to the crowd", "annotations": [{"polygon": [[41, 101], [39, 101], [377, 88], [394, 140], [390, 165], [20, 181]], "text": "MOTUL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MOTUL", "recog_valid": true, "glyph_recog_text": "MOTUL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342862.jpg", "caption": "a pizza in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211807.jpg", "caption": "a plane parked on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080742.jpg", "caption": "a man in a chef's uniform holding a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211830.jpg", "caption": "a double decker bus is stopped in the middle of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211863.jpg", "caption": "a boy riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080792.jpg", "caption": "a car covered in animals and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211867.jpg", "caption": "a vintage photo of a parade with a truck and people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211868.jpg", "caption": "a tennis court with two people playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474012.jpg", "caption": "a tram at a train station with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211874.jpg", "caption": "a man walking with a suitcase at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342969.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211905.jpg", "caption": "genius at work desk lamp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474062.jpg", "caption": "a woman playing tennis on a court", "annotations": [{"polygon": [[27, 196], [26, 232], [140, 235], [142, 199]], "text": "delta", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "delta", "recog_valid": true, "glyph_recog_text": "delta", "glyph_recog_ld": 1.0}, {"polygon": [[150, 199], [149, 242], [265, 245], [265, 204]], "text": "lloyd", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "lloy", "recog_valid": false, "glyph_recog_text": "lloyd", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[298, 204], [289, 238], [513, 244], [513, 210]], "text": "PROXIMU", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "PROXIMU", "recog_valid": true, "glyph_recog_text": "PROXIMU", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080853.jpg", "caption": "a dog sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342998.jpg", "caption": "a street sign pointing to different directions in arabic", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211927.jpg", "caption": "a cat laying on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343002.jpg", "caption": "a woman eating a sandwich", "annotations": [{"polygon": [[441, 496], [454, 471], [409, 435], [391, 454], [413, 475]], "text": "AQUAFINA", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "AOUAFIA", "recog_valid": false, "glyph_recog_text": "AQUAFINA", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[94, 269], [113, 296], [140, 278], [173, 226], [152, 203]], "text": "VOLS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YOLN", "recog_valid": false, "glyph_recog_text": "VOLS", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211935.jpg", "caption": "showboat casino las vegas", "annotations": [{"polygon": [[338, 194], [338, 194], [454, 185], [454, 185], [455, 161], [455, 161], [338, 171]], "text": "SHOWBOAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SHOWBOAT", "recog_valid": true, "glyph_recog_text": "SHOWBOAT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474084.jpg", "caption": "a living room with a couch, a television and a rug", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211948.jpg", "caption": "a man paddles through rapids on a kayak", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343035.jpg", "caption": "a street sign on a pole in front of a house", "annotations": [{"polygon": [[240, 183], [278, 205], [276, 220], [238, 200]], "text": "ONTARO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONTARIO", "recog_valid": false, "glyph_recog_text": "ONTARO", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211963.jpg", "caption": "a truck with graffiti on it", "annotations": [{"polygon": [[271, 229], [270, 329], [378, 306], [377, 232]], "text": "TeTRis", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "恤", "recog_valid": false, "glyph_recog_text": "TeTRis", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343048.jpg", "caption": "a stop sign and street signs on a pole", "annotations": [{"polygon": [[248, 200], [247, 216], [304, 230], [304, 218], [305, 218], [306, 219], [307, 219], [307, 215]], "text": "FRONT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FRONT", "recog_valid": true, "glyph_recog_text": "FRONT", "glyph_recog_ld": 1.0}, {"polygon": [[249, 195], [249, 195], [295, 163], [296, 177], [252, 208]], "text": "URTH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NURTH", "recog_valid": false, "glyph_recog_text": "URTH", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211983.jpg", "caption": "a pair of scissors in a box with a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080937.jpg", "caption": "a man is standing next to a small train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474155.jpg", "caption": "a birthday cake with candles on it", "annotations": [{"polygon": [[318, 400], [394, 395], [393, 457], [327, 448]], "text": "38", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "38", "recog_valid": true, "glyph_recog_text": "38", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474161.jpg", "caption": "a man in purple shirt and purple shorts is about to hit a tennis ball", "annotations": [{"polygon": [[89, 86], [137, 71], [179, 69], [225, 88], [223, 121], [182, 119], [137, 121], [126, 123], [88, 122]], "text": "edEx", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "edEx", "recog_valid": true, "glyph_recog_text": "edEx", "glyph_recog_ld": 1.0}, {"polygon": [[396, 72], [395, 121], [425, 123], [425, 72]], "text": "F", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "LC", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080950.jpg", "caption": "a large teddy bear sitting in a wagon", "annotations": [{"polygon": [[242, 147], [245, 227], [375, 231], [373, 155]], "text": "rrods", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "smeds", "recog_valid": false, "glyph_recog_text": "rrods", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[85, 247], [84, 277], [128, 282], [129, 252]], "text": "Harrods", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "fleovels", "recog_valid": false, "glyph_recog_text": "Harrods", "glyph_recog_ld": 0.12500109374863277}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212025.jpg", "caption": "a person is holding a cat in a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343108.jpg", "caption": "a fighter jet is parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474180.jpg", "caption": "a group of boats are docked on the river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080980.jpg", "caption": "a group of cows standing on the side of the road", "annotations": [{"polygon": [[39, 176], [39, 176], [94, 175], [97, 203], [39, 215], [35, 185]], "text": "STD", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STE", "recog_valid": false, "glyph_recog_text": "STD", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212070.jpg", "caption": "pink umbrella at the pink parade", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474224.jpg", "caption": "a vase with flowers sitting on a window sill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343156.jpg", "caption": "a green bus driving down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343158.jpg", "caption": "a motorcycle is on display in a room with other motorcycles", "annotations": [{"polygon": [[208, 359], [300, 376], [285, 389], [191, 371]], "text": "AVON ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AVON", "recog_valid": false, "glyph_recog_text": "AVON", "glyph_recog_ld": 1.0}, {"polygon": [[49, 389], [74, 427], [96, 427], [97, 423], [64, 382]], "text": "conco", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "conco", "recog_valid": true, "glyph_recog_text": "conco", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343166.jpg", "caption": "united airlines boeing 737-800", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474255.jpg", "caption": "a football game with players on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474271.jpg", "caption": "a train with a blue and red train car on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343225.jpg", "caption": "a desk with a computer mouse, a book, and a toy car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081084.jpg", "caption": "a building with a clock on the side of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081102.jpg", "caption": "a man in a black jacket is walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474319.jpg", "caption": "two women holding surfboards on a beach", "annotations": [{"polygon": [[316, 195], [343, 258], [355, 254], [328, 190]], "text": "ST SOFTOPS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SOFTOPS", "recog_valid": false, "glyph_recog_text": "STSOFTOFE", "glyph_recog_ld": 0.5555560493821674}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081107.jpg", "caption": "a street sign with a space needle in the background", "annotations": [{"polygon": [[230, 308], [293, 301], [296, 329], [222, 336]], "text": "AVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AVE", "recog_valid": true, "glyph_recog_text": "AVE", "glyph_recog_ld": 1.0}, {"polygon": [[323, 291], [352, 288], [356, 330], [325, 333]], "text": "N", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Z", "recog_valid": false, "glyph_recog_text": "z", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[138, 306], [147, 346], [135, 359], [120, 362], [106, 358], [102, 339], [108, 324], [118, 312]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "6", "recog_valid": true, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[150, 129], [152, 187], [254, 273], [254, 273], [257, 223], [150, 127]], "text": " 200Thomas", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Thons", "recog_valid": false, "glyph_recog_text": "200Thomas", "glyph_recog_ld": 0.44444506172770915}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474330.jpg", "caption": "a mannequin wearing a suit and bow tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081126.jpg", "caption": "a delta airlines airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212199.jpg", "caption": "a man walking by a train station with a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343279.jpg", "caption": "a white suv parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081135.jpg", "caption": "a group of people standing in a courtyard with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343284.jpg", "caption": "a bowl of flour on a counter top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474363.jpg", "caption": "a train is parked at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474378.jpg", "caption": "a street sign on a pole with a tree in the background", "annotations": [{"polygon": [[245, 121], [244, 144], [394, 135], [395, 113]], "text": "STEINER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STEINER", "recog_valid": true, "glyph_recog_text": "STEINER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474390.jpg", "caption": "air canada airbus a320-214", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474396.jpg", "caption": "a bus with people on it driving by palm trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474404.jpg", "caption": "a man wearing a red shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474405.jpg", "caption": "a tv on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081200.jpg", "caption": "an older couple sitting at a table with a cake", "annotations": [{"polygon": [[191, 375], [174, 333], [200, 325], [218, 368]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "lhpiea", "recog_valid": false, "glyph_recog_text": "Birthda", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343349.jpg", "caption": "a street with many umbrellas and colorful umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474423.jpg", "caption": "a baseball player catching a ball in mid air", "annotations": [{"polygon": [[205, 250], [193, 266], [220, 282], [236, 256], [230, 252]], "text": "44", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "4", "recog_valid": false, "glyph_recog_text": "44", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[229, 221], [215, 237], [240, 254], [254, 238], [240, 223]], "text": "AMMS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MOS", "recog_valid": false, "glyph_recog_text": "AAS", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474472.jpg", "caption": "a desk with a computer, a laptop, and a monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212330.jpg", "caption": "a person kiteboarding in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343422.jpg", "caption": "people eating outside on a patio", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343425.jpg", "caption": "a bottle of orange juice and an orange", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474550.jpg", "caption": "a collage of photos showing different types of doughnuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474562.jpg", "caption": "a laptop computer sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212429.jpg", "caption": "a traffic light tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474614.jpg", "caption": "a korean bbq truck parked on the side of the road", "annotations": [{"polygon": [[158, 152], [155, 172], [240, 155], [243, 134]], "text": "KOREAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KOREAN", "recog_valid": true, "glyph_recog_text": "KOREAN", "glyph_recog_ld": 1.0}, {"polygon": [[254, 130], [252, 153], [313, 141], [312, 118]], "text": "BBQ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BBO", "recog_valid": false, "glyph_recog_text": "BBQ", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[325, 114], [333, 137], [427, 122], [435, 116], [440, 102], [436, 94], [429, 91]], "text": "TACO", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "TACO", "recog_valid": true, "glyph_recog_text": "TACO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081401.jpg", "caption": "a group of men playing frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474622.jpg", "caption": "a stop sign is sitting in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081416.jpg", "caption": "a person riding a motorcycle on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474637.jpg", "caption": "a desk with a computer monitor, a keyboard, and a bottle of beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474642.jpg", "caption": "two men in uniform standing on a dock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474656.jpg", "caption": "a neon sign with a baseball player and a clock", "annotations": [{"polygon": [[386, 283], [393, 295], [472, 253], [467, 241]], "text": "THE ORIGINAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "THOTOG31", "recog_valid": false, "glyph_recog_text": "THE DRIGINAL", "glyph_recog_ld": 0.25000062499947917}, {"polygon": [[21, 216], [23, 265], [148, 259], [146, 221]], "text": "Dorona", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Cyorona", "recog_valid": false, "glyph_recog_text": "Dorona", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[345, 207], [350, 286], [511, 219], [500, 191]], "text": "Philippe", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Phabne", "recog_valid": false, "glyph_recog_text": "Philippe", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343596.jpg", "caption": "a golden clock sitting on top of a building", "annotations": [{"polygon": [[353, 451], [354, 481], [427, 486], [429, 459]], "text": "Informa", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Informal", "recog_valid": false, "glyph_recog_text": "Informa", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[106, 451], [108, 481], [257, 478], [253, 450]], "text": "Information", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Information", "recog_valid": true, "glyph_recog_text": "Information", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343619.jpg", "caption": "a man playing tennis on a tennis court", "annotations": [{"polygon": [[115, 37], [121, 54], [244, 32], [244, 13]], "text": "China Construction Bank", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "中国建设银行", "recog_valid": false, "glyph_recog_text": "China Constudtion Bank", "glyph_recog_ld": 0.09090950413204357}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212574.jpg", "caption": "three police officers standing next to a stop sign", "annotations": [{"polygon": [[198, 63], [287, 69], [283, 109], [194, 102]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081504.jpg", "caption": "a table with a plate of food and a bottle of juice", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212589.jpg", "caption": "a black and white photo of an airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343691.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474769.jpg", "caption": "an old photo of people at a fair with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081556.jpg", "caption": "a train traveling down the tracks with a yellow and black train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081585.jpg", "caption": "a green motorcycle is on display at a show", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474817.jpg", "caption": "a group of toilets with different designs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474832.jpg", "caption": "a street sign with a stop sign and a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212690.jpg", "caption": "three toilets sitting on the curb", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212712.jpg", "caption": "an old black steam engine train sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212715.jpg", "caption": "a man wearing a tie and a sweater", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081654.jpg", "caption": "a double decker bus drives past the big ben clock tower in london", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474882.jpg", "caption": "a small yellow airplane parked on the runway", "annotations": [{"polygon": [[98, 331], [112, 348], [187, 326], [176, 310]], "text": "N88134", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "N88134", "recog_valid": true, "glyph_recog_text": "N88134", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474887.jpg", "caption": "a fighter jet is on display in a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343815.jpg", "caption": "a clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081683.jpg", "caption": "a close up of a white cow with a tag on its ear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212756.jpg", "caption": "a suitcase with clothes inside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212757.jpg", "caption": "two men playing golf on a field with a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343849.jpg", "caption": "a laptop sitting on a table with a notebook and coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212780.jpg", "caption": "a cat is sitting on a bench outside of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212782.jpg", "caption": "a stop sign with a sticker on it", "annotations": [{"polygon": [[177, 123], [177, 123], [172, 185], [172, 185], [192, 186], [192, 186], [311, 179], [311, 179], [338, 132], [338, 132], [330, 112], [330, 112]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343857.jpg", "caption": "a bird standing on a log in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343881.jpg", "caption": "a red toy car on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343892.jpg", "caption": "a woman riding a surfboard in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343905.jpg", "caption": "a small airplane flying through the air with smoke coming out of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081768.jpg", "caption": "a busy street with two double decker buses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212863.jpg", "caption": "the nelson airship museum", "annotations": [{"polygon": [[130, 201], [125, 216], [130, 217], [134, 214], [150, 207], [153, 211], [158, 207], [158, 203], [170, 197], [170, 192], [172, 187], [174, 185], [175, 181], [178, 176], [173, 174], [164, 185], [159, 188], [161, 184], [159, 183], [156, 189], [138, 198], [140, 192], [138, 191], [134, 195]], "text": "Overnight", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Geaai", "recog_valid": false, "glyph_recog_text": "Ovemigh", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[181, 177], [175, 195], [185, 191], [187, 186], [188, 188], [190, 189], [220, 176], [227, 160], [229, 158], [228, 172], [227, 176], [225, 178], [226, 181], [229, 182], [232, 178], [235, 162], [241, 150], [241, 148], [209, 164], [214, 158], [213, 153], [211, 153], [199, 171], [195, 170], [198, 163], [194, 162], [188, 168]], "text": "Delivery", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Geiiery", "recog_valid": false, "glyph_recog_text": "Delivery", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[245, 153], [241, 155], [243, 163], [246, 164], [256, 171], [261, 166], [264, 154], [270, 154], [276, 151], [327, 131], [335, 106], [333, 103], [331, 103], [296, 121], [294, 121], [300, 113], [299, 111], [296, 112], [288, 125], [264, 136], [267, 129], [265, 127], [255, 131], [253, 134]], "text": "System", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "jisren", "recog_valid": false, "glyph_recog_text": "System", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212868.jpg", "caption": "a black cat with a backpack on", "annotations": [{"polygon": [[334, 141], [352, 162], [371, 181], [387, 202], [396, 200], [404, 193], [395, 180], [389, 173], [382, 167], [370, 153], [360, 144], [349, 132], [344, 129], [339, 132], [342, 137], [335, 141]], "text": "SHEBA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "V33HS", "recog_valid": false, "glyph_recog_text": "SHEBA", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343969.jpg", "caption": "two men in military uniforms cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212920.jpg", "caption": "a group of people on a street with a band", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212934.jpg", "caption": "four baseball players standing in front of a net", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212941.jpg", "caption": "a white car parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081885.jpg", "caption": "a group of military men and women standing around a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475103.jpg", "caption": "a baseball player is throwing a ball to a catcher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212963.jpg", "caption": "a table with a variety of food and wine glasses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344040.jpg", "caption": "a man sitting at a table with a small airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212970.jpg", "caption": "a tray of hot dogs on a counter", "annotations": [{"polygon": [[1, 207], [31, 235], [12, 240], [-1, 225]], "text": "SCO", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "io", "recog_valid": false, "glyph_recog_text": "300", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475129.jpg", "caption": "two horses standing in a field with trees in the background", "annotations": [{"polygon": [[166, 418], [267, 410], [241, 456], [157, 454]], "text": "Haya", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Haeeoodt", "recog_valid": false, "glyph_recog_text": "H a yPa", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[270, 406], [386, 406], [358, 459], [243, 459]], "text": "Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "lPiotsyruand", "recog_valid": false, "glyph_recog_text": "Paotography", "glyph_recog_ld": 0.33333388888842586}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344065.jpg", "caption": "a man standing in front of a fruit stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212998.jpg", "caption": "novak djokovic, the world number one, is the defending champion", "annotations": [{"polygon": [[413, 227], [401, 242], [391, 259], [381, 275], [387, 280], [394, 270], [403, 256], [412, 243], [419, 232]], "text": "AD", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "RAE", "recog_valid": false, "glyph_recog_text": "我心", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213010.jpg", "caption": "a cow with a tag on its ear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344096.jpg", "caption": "a pigeon perched on a street sign", "annotations": [{"polygon": [[186, 302], [185, 292], [240, 271], [241, 282], [218, 294]], "text": "Buckingham", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Buckingham", "recog_valid": true, "glyph_recog_text": "Backinnghsann", "glyph_recog_ld": 0.6153849112423759}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213036.jpg", "caption": "a man and woman sitting on a boat in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213048.jpg", "caption": "a man holding a tie in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344126.jpg", "caption": "a woman in a blue dress playing tennis", "annotations": [{"polygon": [[100, 111], [100, 136], [30, 148], [29, 121]], "text": "CITIZEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CITIZEN", "recog_valid": true, "glyph_recog_text": "CITIZEN", "glyph_recog_ld": 1.0}, {"polygon": [[74, 68], [76, 94], [133, 84], [131, 63]], "text": "Emirates", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Emirates", "recog_valid": true, "glyph_recog_text": "Emirates", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475232.jpg", "caption": "a red bus is parked next to a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213107.jpg", "caption": "a young boy swinging a baseball bat on a field", "annotations": [{"polygon": [[297, 325], [302, 319], [351, 342], [347, 350]], "text": "RAPTOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ESR", "recog_valid": false, "glyph_recog_text": "RS多生OA", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344196.jpg", "caption": "a teddy bear and a heart are sitting on a rock", "annotations": [{"polygon": [[106, 381], [122, 385], [140, 375], [261, 385], [260, 357], [208, 356], [148, 354], [156, 330], [148, 323], [115, 343]], "text": "Grandad", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Srundad", "recog_valid": false, "glyph_recog_text": "Grandad", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213125.jpg", "caption": "two people riding skateboards down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213148.jpg", "caption": "a sandwich and chips on a table next to a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475306.jpg", "caption": "a piece of cake and a cup of coffee on a table", "annotations": [{"polygon": [[360, 66], [349, 77], [368, 88], [394, 102], [417, 109], [452, 119], [461, 109]], "text": "Comics", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "comics", "recog_valid": false, "glyph_recog_text": "Comics", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213172.jpg", "caption": "a desk with books, a lamp and a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213215.jpg", "caption": "a row of motorcycles parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213219.jpg", "caption": "a black and white photo of two electric toothbrushes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475370.jpg", "caption": "a boat traveling down a river with buildings in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475415.jpg", "caption": "a girl in a white shirt and black shorts is hitting a tennis ball", "annotations": [{"polygon": [[261, 230], [337, 199], [343, 216], [327, 219], [313, 222], [284, 235], [270, 244]], "text": "STATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CLEVELAND", "recog_valid": false, "glyph_recog_text": "STATE", "glyph_recog_ld": 0.1111120987643347}, {"polygon": [[285, 258], [308, 251], [323, 245], [336, 240], [337, 235], [333, 227], [279, 246]], "text": "TENNIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TENNIS", "recog_valid": true, "glyph_recog_text": "TENNIS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000082216.jpg", "caption": "a man on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000082225.jpg", "caption": "a man and woman standing next to a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000082228.jpg", "caption": "two men sitting at a table with a glass of wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344374.jpg", "caption": "a blue door with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213312.jpg", "caption": "a woman playing tennis on a court with people watching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213344.jpg", "caption": "three men in purple shirts cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213382.jpg", "caption": "a view of a street from the window of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475561.jpg", "caption": "a flip phone with a screen on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213419.jpg", "caption": "a black and white photo of two boats on the beach", "annotations": [{"polygon": [[123, 287], [136, 258], [194, 275], [184, 303]], "text": "MARLENHY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MARLNEI", "recog_valid": false, "glyph_recog_text": "MAPLENPY", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475575.jpg", "caption": "a stuffed bear wearing a star trek shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475580.jpg", "caption": "a man walking down the street next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344544.jpg", "caption": "a man is holding an umbrella in a field", "annotations": [{"polygon": [[76, 249], [91, 254], [98, 235], [106, 219], [109, 212], [121, 187], [116, 185], [110, 186], [103, 187], [98, 195], [82, 232], [77, 243]], "text": "ESPIRIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "LPatedRE", "recog_valid": false, "glyph_recog_text": "ESPIRIT", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344578.jpg", "caption": "a white and green bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475656.jpg", "caption": "a stop sign on a road with mountains in the background", "annotations": [{"polygon": [[437, 214], [490, 216], [489, 203], [495, 203], [499, 198], [496, 188], [491, 183], [433, 180], [430, 186], [430, 192], [430, 204], [432, 211]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475693.jpg", "caption": "a military jet sitting on the runway with a grassy field behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213552.jpg", "caption": "a man in a darth vader costume standing in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213554.jpg", "caption": "a shelf with toothbrushes and other items on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000082480.jpg", "caption": "a pair of headphones and a mirror are on the floor", "annotations": [{"polygon": [[436, 100], [430, 126], [442, 127], [455, 129], [469, 135], [480, 139], [490, 116]], "text": "The", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "The", "recog_valid": true, "glyph_recog_text": "The", "glyph_recog_ld": 1.0}, {"polygon": [[434, 127], [428, 157], [441, 157], [454, 156], [468, 160], [484, 166], [493, 168], [500, 143]], "text": "Gree", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Gree", "recog_valid": true, "glyph_recog_text": "Gree", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000082494.jpg", "caption": "a blender with a jar on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475731.jpg", "caption": "a man swinging a tennis racket on a tennis court", "annotations": [{"polygon": [[0, 153], [57, 151], [56, 121], [0, 124]], "text": "orld", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "orld", "recog_valid": true, "glyph_recog_text": "orld", "glyph_recog_ld": 1.0}, {"polygon": [[2, 106], [57, 105], [57, 65], [2, 65]], "text": "AS", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "AS", "recog_valid": true, "glyph_recog_text": "AS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213596.jpg", "caption": "a bench with a sign on it in the woods", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344671.jpg", "caption": "a blue bicycle parked in front of a brick building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475763.jpg", "caption": "a small plane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213623.jpg", "caption": "a street corner with a fire hydrant and a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475774.jpg", "caption": "a person sitting on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344720.jpg", "caption": "a city street with cars and buildings in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344726.jpg", "caption": "a black and gold clock on a pole in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344729.jpg", "caption": "a table with three plates of pastries and a man using his cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475808.jpg", "caption": "a three tiered cake stand with cupcakes on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475810.jpg", "caption": "a traffic light and a pole with a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344793.jpg", "caption": "a group of people on skis and snowboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475881.jpg", "caption": "a clock on a pole in front of a tall building", "annotations": [{"polygon": [[139, 222], [133, 229], [142, 234], [152, 235], [162, 231], [175, 222], [182, 214], [180, 205], [173, 213], [166, 220], [159, 225], [153, 226], [144, 225]], "text": "BUILDING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "BUIDING", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475883.jpg", "caption": "a woman holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475915.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344853.jpg", "caption": "a man in jeans and a vest standing on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213790.jpg", "caption": "a man eating pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475939.jpg", "caption": "a table with bananas on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475960.jpg", "caption": "a man holding a tennis racket on a tennis court", "annotations": [{"polygon": [[-1, 158], [0, 191], [73, 192], [76, 158]], "text": "Madrid", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Madrie", "recog_valid": false, "glyph_recog_text": "Madrid", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344893.jpg", "caption": "a man in a suit standing next to a pink food truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213826.jpg", "caption": "a yellow bus is driving down a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000082766.jpg", "caption": "a person riding skis down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476000.jpg", "caption": "a train engine with smoke coming out of the chimney", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344945.jpg", "caption": "a tablet computer sitting on top of a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476045.jpg", "caption": "a baseball player swinging at a pitch", "annotations": [{"polygon": [[150, 206], [150, 206], [165, 199], [191, 193], [195, 199], [198, 221], [160, 234]], "text": "26", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "26", "recog_valid": true, "glyph_recog_text": "26", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213902.jpg", "caption": "a man on a skateboard", "annotations": [{"polygon": [[184, 148], [182, 153], [197, 165], [214, 182], [219, 176], [204, 162], [196, 154], [190, 150]], "text": "Santa", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "slAeCte", "recog_valid": false, "glyph_recog_text": "¥电品", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[178, 159], [182, 154], [196, 165], [216, 182], [213, 188], [199, 176], [191, 169]], "text": "Skate", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SKRTE SR", "recog_valid": false, "glyph_recog_text": "+e:..", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476054.jpg", "caption": "a man doing a trick on a skateboard in a bowling alley", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476060.jpg", "caption": "a green and blue double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344996.jpg", "caption": "a woman in a dress is standing next to a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344998.jpg", "caption": "a green truck with a boat on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476074.jpg", "caption": "a baseball player is standing at home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345019.jpg", "caption": "a man and a woman laying on a bed reading books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345020.jpg", "caption": "a man riding a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476106.jpg", "caption": "a large green phone", "annotations": [{"polygon": [[131, 125], [131, 125], [131, 153], [214, 145], [213, 116]], "text": "cricket", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "cricket", "recog_valid": true, "glyph_recog_text": "cricket", "glyph_recog_ld": 1.0}, {"polygon": [[14, 64], [13, 93], [106, 130], [110, 92], [54, 64]], "text": "WGN", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "WGN", "recog_valid": true, "glyph_recog_text": "WGN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345042.jpg", "caption": "a tug boat is in the water near a log", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476125.jpg", "caption": "a woman and man sitting at a table with a plate of food", "annotations": [{"polygon": [[26, 283], [38, 292], [19, 324], [7, 314]], "text": "LIFE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "LIFE", "recog_valid": true, "glyph_recog_text": "LIFE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213999.jpg", "caption": "a table with cupcakes and bananas on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476160.jpg", "caption": "a white bus driving down a street with trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214024.jpg", "caption": "a small bedroom with a desk and a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214036.jpg", "caption": "a street sign with a picture of a car and a sign has texts", "annotations": [{"polygon": [[153, 119], [250, 107], [249, 130], [209, 141], [154, 142]], "text": "Hoheitsgebiet", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Haheitsgebiet", "recog_valid": false, "glyph_recog_text": "Hoheitsgebie", "glyph_recog_ld": 0.8461539644969504}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476188.jpg", "caption": "a woman is opening the oven door to check on a dish", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345118.jpg", "caption": "a teddy bear cake", "annotations": [{"polygon": [[223, 334], [323, 348], [321, 325], [224, 316]], "text": "Rachel", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "piichol", "recog_valid": false, "glyph_recog_text": "Rachel", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476198.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345142.jpg", "caption": "a man is playing tennis on a court", "annotations": [{"polygon": [[302, 346], [404, 340], [382, 247], [323, 251], [321, 258], [322, 275], [324, 316], [300, 321]], "text": "Beneficial", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "置", "recog_valid": false, "glyph_recog_text": "Bonefcidl", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345167.jpg", "caption": "a large red truck with chains attached to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345188.jpg", "caption": "a man sitting on a couch", "annotations": [{"polygon": [[360, 171], [373, 160], [386, 150], [399, 142], [414, 137], [433, 133], [437, 142], [420, 142], [410, 148], [399, 152], [390, 160], [375, 169], [365, 179]], "text": "COUGARS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "sa19003", "recog_valid": false, "glyph_recog_text": "COUGARS", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214165.jpg", "caption": "a traffic light with a street sign on it", "annotations": [{"polygon": [[111, 187], [147, 154], [162, 173], [126, 202]], "text": "Van", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Van", "recog_valid": true, "glyph_recog_text": "Van", "glyph_recog_ld": 1.0}, {"polygon": [[162, 142], [215, 95], [227, 115], [176, 159]], "text": "Ness", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ness", "recog_valid": true, "glyph_recog_text": "Ness", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345265.jpg", "caption": "a hand holding a glass of wine with a label on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476341.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476344.jpg", "caption": "a woman sitting on a bench with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214209.jpg", "caption": "a train with graffiti on it is parked in front of a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214254.jpg", "caption": "a baseball game with a batter at bat and a pitcher on the mound", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214265.jpg", "caption": "president obama pins the medal of honor on james w mccabe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345357.jpg", "caption": "the belfry of the grote stad in brussels", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000083219.jpg", "caption": "a green bus with a bicycle attached to the front", "annotations": [{"polygon": [[344, 99], [346, 121], [402, 130], [404, 110]], "text": "VULCAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "VULCAN", "recog_valid": true, "glyph_recog_text": "VULCAN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345370.jpg", "caption": "a group of boats docked in a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476443.jpg", "caption": "a red truck driving down a road in the woods", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345391.jpg", "caption": "adams birds nest", "annotations": [{"polygon": [[145, 153], [132, 192], [173, 190], [169, 185], [244, 186], [244, 160], [232, 158], [164, 160], [164, 155]], "text": "ADAM'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ADAMS", "recog_valid": false, "glyph_recog_text": "ADAM'S", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[249, 153], [251, 190], [336, 185], [336, 162]], "text": "BERRIES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BERRIES", "recog_valid": true, "glyph_recog_text": "BERRIES", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476463.jpg", "caption": "a woman is smiling while holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000083269.jpg", "caption": "a bus is being loaded onto a ferry boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345417.jpg", "caption": "a man sitting at a table with a hot dog and a plate of food", "annotations": [{"polygon": [[93, 58], [93, 58], [93, 84], [115, 84], [145, 86], [148, 56]], "text": "HOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "HOT", "recog_valid": true, "glyph_recog_text": "HOT", "glyph_recog_ld": 1.0}, {"polygon": [[156, 53], [157, 83], [209, 82], [207, 49]], "text": "DOG", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "DOG", "recog_valid": true, "glyph_recog_text": "DOG", "glyph_recog_ld": 1.0}, {"polygon": [[221, 49], [308, 45], [310, 74], [224, 77]], "text": "EATING", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "EATING", "recog_valid": true, "glyph_recog_text": "EATING", "glyph_recog_ld": 1.0}, {"polygon": [[92, 6], [111, 5], [114, 17], [120, 13], [143, 16], [148, 26], [141, 35], [134, 36], [131, 40], [95, 44]], "text": "3RD", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "3KD", "recog_valid": false, "glyph_recog_text": "3RD", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[164, 5], [158, 36], [163, 37], [270, 31], [271, 31], [269, 7], [211, 3], [174, 3]], "text": "ANNUAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ANNUAL", "recog_valid": true, "glyph_recog_text": "ANNUAL", "glyph_recog_ld": 1.0}, {"polygon": [[236, 355], [326, 335], [336, 371], [235, 399]], "text": "PEZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "PEZ", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[85, 173], [85, 210], [158, 229], [171, 189]], "text": "2009", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "2009", "recog_valid": true, "glyph_recog_text": "2009", "glyph_recog_ld": 1.0}, {"polygon": [[101, 141], [98, 180], [155, 175], [158, 144]], "text": "4TH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ATH", "recog_valid": false, "glyph_recog_text": "4TH", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[88, 104], [85, 140], [178, 141], [226, 91]], "text": "JULY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "0", "recog_valid": false, "glyph_recog_text": "JULY", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[271, 2], [288, 32], [357, 31], [357, 4]], "text": "VEGG", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "VEGG", "recog_valid": true, "glyph_recog_text": "VEGG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476500.jpg", "caption": "a double decker bus on a city street", "annotations": [{"polygon": [[434, 417], [350, 434], [395, 449], [477, 449], [512, 438], [512, 432]], "text": "BUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "", "recog_valid": false, "glyph_recog_text": "B U S", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[398, 407], [309, 427], [257, 413], [350, 395]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476501.jpg", "caption": "a man sitting on a bench with a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214373.jpg", "caption": "a man holding a soccer ball in front of a camera", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476589.jpg", "caption": "a green leather case with scissors, tweezers and a pair of scissors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214450.jpg", "caption": "a stop sign with a sticker on it", "annotations": [{"polygon": [[231, 243], [374, 190], [379, 139], [404, 123], [415, 96], [420, 81], [414, 66], [386, 64], [237, 153], [230, 163], [226, 184], [220, 224], [223, 240]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOR", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[239, 267], [297, 251], [297, 235], [238, 252]], "text": "LAUGHING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LAUGHING", "recog_valid": true, "glyph_recog_text": "LAUGHING", "glyph_recog_ld": 1.0}, {"polygon": [[319, 245], [319, 228], [379, 209], [381, 226]], "text": "DANZIG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DANZIG", "recog_valid": true, "glyph_recog_text": "DANZIG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345531.jpg", "caption": "a skateboarder is doing a trick on a ramp", "annotations": [{"polygon": [[323, 159], [332, 157], [339, 152], [344, 147], [346, 147], [348, 147], [348, 150], [347, 150], [353, 165], [351, 173], [352, 177], [335, 183], [324, 185], [320, 182], [319, 176], [318, 170], [314, 162], [316, 157]], "text": "STAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STAV", "recog_valid": false, "glyph_recog_text": "STAY", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476609.jpg", "caption": "a stop sign in front of a tall building", "annotations": [{"polygon": [[359, 276], [455, 228], [461, 293], [367, 324]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214523.jpg", "caption": "two large trucks parked in a field near a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214524.jpg", "caption": "a small wooden model of a house on the beach", "annotations": [{"polygon": [[63, 382], [59, 399], [20, 415], [20, 400]], "text": "SAFEWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "SAFEWMY", "recog_valid": false, "glyph_recog_text": "SAFEWAY", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345608.jpg", "caption": "three motorcyclists racing on a race track", "annotations": [{"polygon": [[86, 0], [86, 55], [381, 58], [380, 0]], "text": "eaBull", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "eabul", "recog_valid": false, "glyph_recog_text": "eaBull", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000083471.jpg", "caption": "a woman walking past a sign that says bridges place", "annotations": [{"polygon": [[159, 27], [156, 54], [234, 100], [236, 78]], "text": "BRYDGES", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BRYDGES", "recog_valid": true, "glyph_recog_text": "BRYDGES", "glyph_recog_ld": 1.0}, {"polygon": [[155, 60], [153, 88], [208, 114], [209, 91]], "text": "PLACE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PLACE", "recog_valid": true, "glyph_recog_text": "PLACE", "glyph_recog_ld": 1.0}, {"polygon": [[217, 95], [213, 117], [245, 134], [247, 112]], "text": "WC2", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "WC2", "recog_valid": true, "glyph_recog_text": "WC2", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345625.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345630.jpg", "caption": "a train on the tracks at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476706.jpg", "caption": "a large clock on the side of a building", "annotations": [{"polygon": [[0, 317], [74, 303], [76, 322], [-1, 336]], "text": "CHMUCK", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CHMUCK", "recog_valid": true, "glyph_recog_text": "CHMUCK", "glyph_recog_ld": 1.0}, {"polygon": [[188, 257], [269, 249], [309, 250], [355, 251], [356, 291], [315, 289], [279, 288], [249, 292], [189, 296]], "text": "KURZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KURZ", "recog_valid": true, "glyph_recog_text": "KURZ", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345655.jpg", "caption": "a teddy bear, a mouse, and a rabbit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000083516.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214608.jpg", "caption": "a fire hydrant sitting on the sidewalk next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476761.jpg", "caption": "two parking meters are on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345691.jpg", "caption": "a dog wearing a birthday hat", "annotations": [{"polygon": [[348, 103], [335, 88], [386, 56], [424, 67], [425, 98], [410, 92], [384, 91], [366, 94]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "63", "recog_valid": false, "glyph_recog_text": "P", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[238, 167], [289, 128], [321, 108], [344, 99], [389, 93], [404, 95], [420, 99], [411, 169], [403, 153], [393, 142], [380, 146], [356, 150], [336, 158], [317, 177], [303, 191]], "text": "BRIHD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "小", "recog_valid": false, "glyph_recog_text": "BRIHD", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476812.jpg", "caption": "a black and white photo of a clock tower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345745.jpg", "caption": "a modern bathroom with a white tub and a black and white wall", "annotations": [{"polygon": [[20, 247], [20, 277], [70, 276], [70, 246]], "text": "07", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "07", "recog_valid": true, "glyph_recog_text": "07", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476826.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[103, 74], [510, 92], [504, 190], [95, 146]], "text": "J.P.Morga", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "J.PMorga", "recog_valid": false, "glyph_recog_text": "J.P.Morga", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000083625.jpg", "caption": "a red surfboard on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345786.jpg", "caption": "a street with cars and a red light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476881.jpg", "caption": "a sandwich and a beer on a table", "annotations": [{"polygon": [[102, 55], [110, 93], [130, 90], [155, 80], [187, 80], [201, 79], [207, 87], [217, 88], [230, 79], [237, 55], [215, 46], [158, 47]], "text": "CORKY'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "COSE", "recog_valid": false, "glyph_recog_text": "CORKY'S", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476901.jpg", "caption": "a view of a room through a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214764.jpg", "caption": "two cows standing in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345844.jpg", "caption": "a group of men sitting at a table with laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214792.jpg", "caption": "a small airplane parked on the grass in front of a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214801.jpg", "caption": "a refrigerator with magnets and pictures on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345880.jpg", "caption": "a highway sign with a green sign on top of it", "annotations": [{"polygon": [[341, 182], [339, 200], [466, 184], [467, 166]], "text": "Baltimore", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Baltimore", "recog_valid": true, "glyph_recog_text": "Baltimore", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476960.jpg", "caption": "a cat is eating a banana", "annotations": [{"polygon": [[278, 254], [434, 284], [434, 284], [461, 277], [407, 260], [298, 243], [298, 243], [280, 245]], "text": "Nelc", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "N e l c", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345914.jpg", "caption": "a man walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477015.jpg", "caption": "a horse drawn carriage is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214876.jpg", "caption": "two people riding motorcycles in the air over a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346004.jpg", "caption": "a bicycle with a box on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000083862.jpg", "caption": "a sandwich on a plate with a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477079.jpg", "caption": "two men playing tennis on a blue court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346021.jpg", "caption": "four girls in school uniforms posing for a picture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346041.jpg", "caption": "a small airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477137.jpg", "caption": "a cat sleeping on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214995.jpg", "caption": "a young boy bending over to hit a baseball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477156.jpg", "caption": "a living room with a couch, a television, and a box", "annotations": [{"polygon": [[177, 205], [177, 221], [225, 258], [220, 235]], "text": "BRAVIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BRAVIA", "recog_valid": true, "glyph_recog_text": "BRAVIA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215033.jpg", "caption": "a woman wearing a bear hat in a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477192.jpg", "caption": "a group of people standing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477233.jpg", "caption": "a laptop computer sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346161.jpg", "caption": "a woman in a floral dress holding a pizza box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346178.jpg", "caption": "a baseball player running to first base", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084046.jpg", "caption": "a man playing a video game in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215167.jpg", "caption": "a computer desk with a keyboard, mouse, and monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084103.jpg", "caption": "a blue and white train sitting on the tracks", "annotations": [{"polygon": [[317, 247], [329, 238], [352, 238], [345, 270], [336, 280], [324, 269], [315, 269]], "text": "alex", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "兽", "recog_valid": false, "glyph_recog_text": "xeje", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084114.jpg", "caption": "a group of young baseball players standing on a field", "annotations": [{"polygon": [[290, 83], [292, 114], [214, 118], [213, 87]], "text": "one", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "pne", "recog_valid": false, "glyph_recog_text": "one", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[289, 62], [290, 102], [387, 99], [385, 68]], "text": "C", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Snc", "recog_valid": false, "glyph_recog_text": "C", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477335.jpg", "caption": "a person is flying a kite on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346266.jpg", "caption": "a yellow fishing boat is docked at a dock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084129.jpg", "caption": "a man holding a sign that says white power", "annotations": [{"polygon": [[253, 127], [315, 136], [311, 157], [280, 155], [267, 160], [249, 156], [249, 138]], "text": "what?", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wha", "recog_valid": false, "glyph_recog_text": "what?", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[248, 168], [266, 161], [266, 161], [315, 165], [354, 169], [357, 198], [344, 240], [324, 246], [240, 246]], "text": "NO!", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NO", "recog_valid": false, "glyph_recog_text": "NO!", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477351.jpg", "caption": "two women in purple outfits are riding on an elephant", "annotations": [{"polygon": [[222, 145], [227, 171], [244, 172], [264, 164], [347, 180], [364, 188], [374, 160], [370, 152], [331, 152], [301, 149], [274, 141], [253, 140], [237, 137], [223, 139]], "text": "BARNUM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BARNU", "recog_valid": false, "glyph_recog_text": "BARNUM", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[246, 171], [252, 177], [252, 193], [267, 196], [282, 194], [294, 193], [317, 208], [360, 205], [365, 189], [346, 183], [303, 178], [292, 173], [276, 170], [266, 163], [257, 163]], "text": "BARNUM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "B", "recog_valid": false, "glyph_recog_text": "BARNUM", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215215.jpg", "caption": "a stop sign on a pole", "annotations": [{"polygon": [[278, 182], [282, 281], [53, 300], [55, 201]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346314.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[229, 226], [245, 220], [245, 220], [257, 212], [264, 228], [253, 239], [237, 245]], "text": "52", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "52", "recog_valid": true, "glyph_recog_text": "52", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084183.jpg", "caption": "a man talking on a cell phone while standing on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346341.jpg", "caption": "a clock tower on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477417.jpg", "caption": "a traffic light with a clock tower in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084211.jpg", "caption": "a group of women standing in front of a cake", "annotations": [{"polygon": [[204, 339], [255, 322], [270, 333], [221, 354]], "text": "READ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "READ", "recog_valid": true, "glyph_recog_text": "READ", "glyph_recog_ld": 1.0}, {"polygon": [[335, 320], [344, 312], [405, 333], [400, 343]], "text": "Maltree", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "M", "recog_valid": false, "glyph_recog_text": "uai.rps", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477428.jpg", "caption": "a man doing a trick on a skateboard", "annotations": [{"polygon": [[329, 354], [322, 376], [362, 378], [372, 381], [392, 374], [445, 372], [461, 356], [461, 350], [446, 346], [442, 347], [439, 352]], "text": "esport", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "esport", "recog_valid": true, "glyph_recog_text": "esport", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477440.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346366.jpg", "caption": "a large group of sheep standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215304.jpg", "caption": "a red and white train engine on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477451.jpg", "caption": "a young boy in a baseball uniform is about to catch a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346384.jpg", "caption": "a bus and a car driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477459.jpg", "caption": "a narrow alley with a bicycle parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346407.jpg", "caption": "three boys playing soccer in front of the brooklyn bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346433.jpg", "caption": "a young boy sitting in front of a pile of vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084314.jpg", "caption": "a display case filled with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346458.jpg", "caption": "a person skiing down a snowy mountain side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346469.jpg", "caption": "a skateboarder in the air doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215398.jpg", "caption": "a television mounted on a wall above a fish tank", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346472.jpg", "caption": "a view of a city street at night from a balcony", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477563.jpg", "caption": "a man and woman on a motorcycle on a dock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477580.jpg", "caption": "a table with a bottle of wine, vegetables, and a bottle of juice", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215436.jpg", "caption": "a man sitting on a yellow fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477585.jpg", "caption": "a baseball game in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084386.jpg", "caption": "a young man sitting on a brick wall with a skateboard", "annotations": [{"polygon": [[175, 226], [292, 222], [297, 249], [278, 276], [174, 278]], "text": "RVC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PVOO", "recog_valid": false, "glyph_recog_text": "RVC", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[202, 17], [202, 61], [234, 62], [231, 15]], "text": "Sex", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "co o", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346544.jpg", "caption": "two police cars parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477617.jpg", "caption": "a sandwich with meat and pickles on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215488.jpg", "caption": "a large black truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477636.jpg", "caption": "two small planes parked on the shore of a lake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477643.jpg", "caption": "a laptop computer sitting on a table", "annotations": [{"polygon": [[13, 230], [18, 239], [20, 240], [86, 197], [79, 186], [16, 226]], "text": "paranormal", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "paranormal", "recog_valid": true, "glyph_recog_text": "paranormal", "glyph_recog_ld": 1.0}, {"polygon": [[20, 241], [28, 252], [89, 216], [91, 221], [94, 216], [92, 205], [81, 207]], "text": "interactivity", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "interactivity", "recog_valid": true, "glyph_recog_text": "interactivity", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215502.jpg", "caption": "a red and white airplane flying through a cloudy sky", "annotations": [{"polygon": [[201, 336], [189, 365], [171, 357], [174, 350], [193, 331]], "text": "Classic", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "oHTDY", "recog_valid": false, "glyph_recog_text": "S2E5R1K", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215511.jpg", "caption": "a baseball player throwing a pitch on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215561.jpg", "caption": "a girl wearing a red tie standing by a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477719.jpg", "caption": "a large number of toilets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477724.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346653.jpg", "caption": "a man on a motorcycle riding down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346668.jpg", "caption": "a woman and a child standing next to a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477749.jpg", "caption": "a man is standing in the front of a green and red train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215612.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[180, 138], [212, 106], [230, 110], [248, 122], [258, 139], [260, 155], [262, 180], [231, 201], [228, 180], [221, 157], [209, 140], [195, 136]], "text": "LENZUELA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CBZUELA", "recog_valid": false, "glyph_recog_text": "LENZUELA", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[127, 211], [188, 141], [218, 167], [165, 232]], "text": "15", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "1 5", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215618.jpg", "caption": "a woman walking a dog in the rain with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084551.jpg", "caption": "a street sign with many different signs on it", "annotations": [{"polygon": [[135, 181], [175, 193], [185, 217], [146, 207]], "text": "EUROPE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "日MR", "recog_valid": false, "glyph_recog_text": "EUROPE", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477800.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215664.jpg", "caption": "a man swinging a tennis racket at a tennis ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346754.jpg", "caption": "a kitchen with a large island and wooden floors", "annotations": [{"polygon": [[366, 391], [366, 421], [505, 422], [505, 393]], "text": "Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Photography", "recog_valid": true, "glyph_recog_text": "Photography", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215706.jpg", "caption": "a man holding a cell phone with a picture of a movie on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477852.jpg", "caption": "a motorcycle racer is racing down a track", "annotations": [{"polygon": [[227, 263], [236, 283], [257, 266], [243, 250]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[337, 300], [344, 317], [370, 289], [359, 278]], "text": "ONGETT'A", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONCETA", "recog_valid": false, "glyph_recog_text": "COENETPA", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215738.jpg", "caption": "a group of people riding on motorcycles on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477882.jpg", "caption": "a man holding a tennis racket", "annotations": [{"polygon": [[86, 287], [164, 286], [162, 371], [85, 368]], "text": "RO", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "RO", "recog_valid": true, "glyph_recog_text": "R", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346849.jpg", "caption": "a man in uniform riding a horse in an arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346915.jpg", "caption": "two women standing at a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215847.jpg", "caption": "a black and white photo of a cake with a knife", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215858.jpg", "caption": "a young child on skis in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215878.jpg", "caption": "a microwave oven sitting on a counter with a christmas stocking hanging on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478031.jpg", "caption": "an all nippon air boeing 767-300", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478032.jpg", "caption": "a model of a highway with cars and trucks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215908.jpg", "caption": "a cell phone, a wallet, a key chain and a cell phone", "annotations": [{"polygon": [[436, 348], [425, 359], [400, 343], [401, 329]], "text": "Weekly", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Weekly", "recog_valid": true, "glyph_recog_text": "Weekty", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[401, 329], [396, 341], [356, 318], [362, 308]], "text": "MacBreak", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MacBreal", "recog_valid": false, "glyph_recog_text": "tMacBosak", "glyph_recog_ld": 0.5555560493821674}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478052.jpg", "caption": "a woman playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478067.jpg", "caption": "a painting of a horse and carriage on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478087.jpg", "caption": "a tablet with a keyboard and a spreadsheet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347021.jpg", "caption": "a cat is sitting on a blanket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347042.jpg", "caption": "a view of a city street with a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347055.jpg", "caption": "a man is on a treadmill watching a tennis match", "annotations": [{"polygon": [[344, 328], [390, 328], [391, 360], [343, 360]], "text": "209", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "209", "recog_valid": true, "glyph_recog_text": "209", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478132.jpg", "caption": "two bicycles parked next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478145.jpg", "caption": "people are walking down the stairs to the train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216003.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[285, 225], [300, 239], [280, 266], [265, 250]], "text": "11", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "一2", "recog_valid": false, "glyph_recog_text": "11", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216015.jpg", "caption": "a fire truck is spraying water on a building at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347101.jpg", "caption": "a woman crossing a street in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216042.jpg", "caption": "a food truck parked at night with a sign has texts", "annotations": [{"polygon": [[0, 127], [97, 146], [89, 197], [0, 181]], "text": "NOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NOR", "recog_valid": true, "glyph_recog_text": "NOR", "glyph_recog_ld": 1.0}, {"polygon": [[123, 204], [134, 118], [227, 139], [213, 212]], "text": "Sisio", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ssre", "recog_valid": false, "glyph_recog_text": "Sisio", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347121.jpg", "caption": "a person holding a bunch of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084981.jpg", "caption": "a bicycle parked on the sidewalk next to a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347133.jpg", "caption": "three baseball players standing on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347135.jpg", "caption": "a green bus driving down the street", "annotations": [{"polygon": [[344, 77], [350, 76], [404, 66], [407, 71], [405, 76], [407, 89], [345, 98], [343, 95], [343, 93], [345, 91], [343, 86], [342, 81]], "text": "SUPER", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SUPER", "recog_valid": true, "glyph_recog_text": "SUPER", "glyph_recog_ld": 1.0}, {"polygon": [[341, 104], [345, 100], [354, 100], [381, 95], [405, 91], [403, 125], [348, 132], [342, 130], [339, 128], [339, 123], [341, 121], [344, 119], [343, 117], [340, 114], [340, 112]], "text": "SALE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SALE", "recog_valid": true, "glyph_recog_text": "SALE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216142.jpg", "caption": "a yellow bus parked in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216150.jpg", "caption": "a woman walking down the street with a basket on her head", "annotations": [{"polygon": [[25, 153], [58, 166], [59, 148], [27, 132]], "text": "NO PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "theuespa", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216158.jpg", "caption": "a large truck driving down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347235.jpg", "caption": "a street sign and a traffic light on a pole", "annotations": [{"polygon": [[142, 253], [240, 248], [236, 274], [142, 280]], "text": "BROADWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BROADWAY", "recog_valid": true, "glyph_recog_text": "BROADWAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000085110.jpg", "caption": "a group of young men sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347292.jpg", "caption": "a man in a red suit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478368.jpg", "caption": "a pan with six mini pizzas on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478391.jpg", "caption": "maple street book shop", "annotations": [{"polygon": [[292, 205], [315, 200], [332, 199], [361, 200], [374, 203], [402, 206], [404, 232], [371, 229], [329, 225], [300, 229]], "text": "MAPLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MAPLE", "recog_valid": true, "glyph_recog_text": "MAPLE", "glyph_recog_ld": 1.0}, {"polygon": [[310, 262], [377, 263], [433, 264], [434, 232], [388, 234], [368, 232], [339, 228], [314, 230]], "text": "STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STREET", "recog_valid": true, "glyph_recog_text": "STREET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478403.jpg", "caption": "a plane taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347340.jpg", "caption": "a woman is petting a cow in an indoor arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347346.jpg", "caption": "an old man sitting on a bench reading a book", "annotations": [{"polygon": [[220, 403], [204, 408], [202, 419], [205, 443], [216, 441], [233, 434], [262, 434], [275, 413], [242, 405]], "text": "good", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SO", "recog_valid": false, "glyph_recog_text": "good", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[290, 408], [283, 412], [277, 430], [280, 435], [292, 433], [341, 433], [357, 411], [321, 406], [302, 403]], "text": "Book s", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "9Booh", "recog_valid": false, "glyph_recog_text": "Book s", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216278.jpg", "caption": "a double decker bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347362.jpg", "caption": "a man eating a piece of cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000085225.jpg", "caption": "a city street at night with buildings and cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216301.jpg", "caption": "a train is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216319.jpg", "caption": "a street with cars parked on both sides of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000085252.jpg", "caption": "two men riding on a jet ski in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347405.jpg", "caption": "avocado toast with orange jam", "annotations": [{"polygon": [[16, 266], [72, 316], [88, 301], [31, 254]], "text": "CHOCOLAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "IVTOOOUO", "recog_valid": false, "glyph_recog_text": "CHOOOLAT", "glyph_recog_ld": 0.2500009374988281}, {"polygon": [[30, 137], [39, 145], [100, 91], [89, 85]], "text": "espress", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "espress", "recog_valid": true, "glyph_recog_text": "eapcraa", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478485.jpg", "caption": "a man is working on a ladder in a large building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347419.jpg", "caption": "a group of children and adults posing for a photo with tennis rackets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478499.jpg", "caption": "a plate of corned beef and pickles on a table", "annotations": [{"polygon": [[441, 85], [426, 129], [424, 142], [419, 169], [417, 192], [423, 204], [434, 209], [448, 208], [454, 202], [456, 195], [474, 90], [474, 90]], "text": "Coke", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ao0", "recog_valid": false, "glyph_recog_text": "0o±", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[463, 145], [472, 149], [485, 145], [494, 93], [482, 89], [476, 91], [464, 121]], "text": "Diete", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "32G", "recog_valid": false, "glyph_recog_text": "a.", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000085291.jpg", "caption": "a fighter jet taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347437.jpg", "caption": "three men in wet suits standing on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000085302.jpg", "caption": "a clock with ornate carvings on the face", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216393.jpg", "caption": "a train is pulling into a station with a red and blue train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000085322.jpg", "caption": "a person holding a silver foil wrapped gift", "annotations": [{"polygon": [[131, 255], [143, 237], [184, 269], [172, 288]], "text": "Linda", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Linda", "recog_valid": true, "glyph_recog_text": "Linda", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347495.jpg", "caption": "a man on a snowboard doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216432.jpg", "caption": "a slice of pizza on a plate", "annotations": [{"polygon": [[194, 142], [196, 171], [229, 176], [228, 147]], "text": "BIG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIG", "recog_valid": true, "glyph_recog_text": "BIG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478576.jpg", "caption": "a teddy bear sitting on a table with a bowl of pudding", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478582.jpg", "caption": "a man handing out food to people on a street", "annotations": [{"polygon": [[0, 308], [0, 321], [28, 311], [41, 307], [56, 298], [70, 293], [67, 284], [48, 291], [39, 294]], "text": "Region", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CONIORY", "recog_valid": false, "glyph_recog_text": "tegian", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[125, 246], [126, 260], [134, 274], [149, 270], [164, 262], [166, 259], [158, 239], [152, 239], [139, 246]], "text": "LOUE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "nd", "recog_valid": false, "glyph_recog_text": "LOUE", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478597.jpg", "caption": "a display case with several pizzas", "annotations": [{"polygon": [[136, 329], [136, 328], [143, 322], [166, 315], [186, 309], [193, 310], [198, 321], [197, 328], [191, 335], [185, 337], [177, 337], [168, 342], [164, 345], [159, 346], [148, 350], [148, 350], [140, 351], [137, 346]], "text": "3.20", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "320", "recog_valid": false, "glyph_recog_text": "3.20", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000085383.jpg", "caption": "a person on a dirt bike in the air at night", "annotations": [{"polygon": [[339, 235], [351, 248], [370, 224], [358, 211]], "text": "125", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "125", "recog_valid": true, "glyph_recog_text": "125", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000085404.jpg", "caption": "a bunch of bananas and a cell phone on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478646.jpg", "caption": "a person holding a remote control in front of a tv", "annotations": [{"polygon": [[283, 392], [285, 400], [332, 379], [328, 370]], "text": "SONY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SONY", "recog_valid": true, "glyph_recog_text": "SONY", "glyph_recog_ld": 1.0}, {"polygon": [[36, 394], [41, 402], [80, 364], [78, 352]], "text": "SON", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SON", "recog_valid": true, "glyph_recog_text": "收防", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347587.jpg", "caption": "a room with chairs and a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347604.jpg", "caption": "two pictures of a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216570.jpg", "caption": "a plane flying in the sky", "annotations": [{"polygon": [[124, 164], [155, 205], [161, 199], [129, 161], [123, 164]], "text": "easyJ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "easy.", "recog_valid": false, "glyph_recog_text": "e...a", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[207, 228], [212, 240], [250, 249], [255, 257], [283, 257], [288, 252], [289, 250], [207, 227]], "text": "easy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "noaoyoy", "recog_valid": false, "glyph_recog_text": "e aay", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216571.jpg", "caption": "two jockeys racing horses on a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347649.jpg", "caption": "a train on the tracks at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216579.jpg", "caption": "a garbage truck parked next to a garbage can", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478726.jpg", "caption": "a chocolate birthday cake with candles on it", "annotations": [{"polygon": [[156, 156], [156, 156], [190, 159], [274, 166], [272, 203], [162, 184]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "nars", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[92, 194], [122, 202], [156, 207], [191, 205], [227, 200], [238, 201], [252, 230], [225, 237], [194, 241], [164, 246], [133, 243], [111, 235], [100, 232], [95, 229]], "text": "RTHDA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TAPA", "recog_valid": false, "glyph_recog_text": "RTHDA", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347655.jpg", "caption": "a woman is putting a toy car together on a table", "annotations": [{"polygon": [[337, 307], [349, 333], [390, 315], [420, 307], [414, 286], [388, 283], [361, 289]], "text": "Lite", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Lie", "recog_valid": false, "glyph_recog_text": "Lite", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478778.jpg", "caption": "a desk with a computer and a laptop on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478783.jpg", "caption": "a large jet airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216663.jpg", "caption": "two women playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216685.jpg", "caption": "a sandwich and salad on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478859.jpg", "caption": "a bedroom with a bed, desk and pictures on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478880.jpg", "caption": "two planes flying in the sky", "annotations": [{"polygon": [[330, 210], [349, 239], [361, 239], [342, 209]], "text": "EAGLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EAJLE", "recog_valid": false, "glyph_recog_text": "EAVORE", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[274, 289], [293, 321], [306, 320], [285, 288]], "text": "EAGLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EABLE", "recog_valid": false, "glyph_recog_text": "EAOLE", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216742.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478892.jpg", "caption": "a table with a yellow tablecloth and chairs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478899.jpg", "caption": "a man on a snowboard going down a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216763.jpg", "caption": "a boy on a skateboard doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216778.jpg", "caption": "a man riding a skateboard at a skate park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478962.jpg", "caption": "a dog wearing a tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216825.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216827.jpg", "caption": "photograph the carriage by james mccormick on 500px", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478982.jpg", "caption": "a street sign with two street signs on it", "annotations": [{"polygon": [[124, 258], [125, 320], [155, 322], [190, 322], [190, 273], [188, 265]], "text": "MT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MT", "recog_valid": true, "glyph_recog_text": "MT", "glyph_recog_ld": 1.0}, {"polygon": [[222, 270], [229, 321], [258, 323], [279, 328], [295, 327], [326, 294], [336, 286], [333, 281], [268, 267], [242, 265], [221, 265]], "text": "HOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HOPR", "recog_valid": false, "glyph_recog_text": "HOP", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[341, 313], [348, 352], [354, 351], [364, 344], [379, 329], [380, 310], [372, 292], [369, 290]], "text": "RD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RD", "recog_valid": true, "glyph_recog_text": "RD", "glyph_recog_ld": 1.0}, {"polygon": [[322, 310], [336, 385], [316, 396], [295, 410], [280, 419], [254, 435], [245, 447], [228, 448], [221, 448], [224, 434], [230, 412], [242, 397], [252, 385], [258, 366], [277, 347]], "text": "ORN", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TMORN", "recog_valid": false, "glyph_recog_text": "ORN", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216840.jpg", "caption": "a group of motorcycles parked on a street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216851.jpg", "caption": "a little girl eating a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347933.jpg", "caption": "caramel apple crumb pie", "annotations": [{"polygon": [[101, 442], [101, 442], [104, 450], [110, 456], [117, 456], [202, 456], [206, 451], [200, 453], [199, 445], [203, 437], [203, 428], [203, 425], [197, 430], [193, 439], [158, 438], [142, 438], [132, 439], [121, 437], [119, 428], [117, 423], [112, 423], [107, 427]], "text": "Caramel", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Carantel", "recog_valid": false, "glyph_recog_text": "Caramel", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479038.jpg", "caption": "a large bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347972.jpg", "caption": "a blue train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479045.jpg", "caption": "a street with a clock on it at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479068.jpg", "caption": "people walking and riding bicycles in an outdoor market", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348019.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[213, 164], [328, 182], [335, 187], [336, 195], [336, 204], [333, 211], [330, 213], [318, 214], [317, 239], [215, 232], [208, 229], [202, 223], [200, 217], [199, 211], [208, 212], [208, 216], [212, 221], [220, 222], [223, 219], [223, 212], [220, 203], [212, 196], [203, 186], [202, 177], [202, 170], [207, 166]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[204, 234], [225, 268], [312, 263], [307, 234]], "text": "volde", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Voloo", "recog_valid": false, "glyph_recog_text": "volde", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348027.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479103.jpg", "caption": "a boat is traveling through the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216963.jpg", "caption": "a brown teddy bear hanging from a wooden pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348047.jpg", "caption": "two people on skis and one person on a wheelchair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479172.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000085960.jpg", "caption": "a display case with two boats and a sail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348116.jpg", "caption": "two people skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217046.jpg", "caption": "two wooden ships are on display in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479198.jpg", "caption": "a fruit stand with oranges, mangoes and limes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479219.jpg", "caption": "a motorcycle is on display in a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348157.jpg", "caption": "a woman in a baseball uniform sitting on a bench", "annotations": [{"polygon": [[206, 228], [235, 220], [239, 226], [242, 254], [213, 266]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "9", "recog_valid": false, "glyph_recog_text": "10", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479234.jpg", "caption": "a golden clock is sitting in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217091.jpg", "caption": "a bicycle parked on a bench in front of a hill", "annotations": [{"polygon": [[210, 238], [241, 276], [248, 275], [222, 239], [217, 236]], "text": "GIANT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CANT", "recog_valid": false, "glyph_recog_text": "pranr", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217094.jpg", "caption": "a person on a dirt bike in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217103.jpg", "caption": "a stop sign with a red flag on it", "annotations": [{"polygon": [[139, 323], [215, 341], [217, 321], [223, 312], [221, 296], [141, 271]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217118.jpg", "caption": "a bunch of street signs hanging from a wire", "annotations": [{"polygon": [[127, 310], [227, 307], [226, 256], [128, 257]], "text": "STATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STATE", "recog_valid": true, "glyph_recog_text": "STATE", "glyph_recog_ld": 1.0}, {"polygon": [[347, 121], [346, 152], [394, 153], [392, 124]], "text": "35TH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "35元H", "recog_valid": false, "glyph_recog_text": "35TH", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[350, 330], [349, 365], [427, 366], [428, 331]], "text": "NORTH", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "NORTH", "recog_valid": true, "glyph_recog_text": "NORTH", "glyph_recog_ld": 1.0}, {"polygon": [[140, 326], [140, 374], [231, 380], [231, 326]], "text": "MADISON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MADISON", "recog_valid": true, "glyph_recog_text": "MADISON", "glyph_recog_ld": 1.0}, {"polygon": [[32, 227], [30, 259], [34, 249], [122, 249], [120, 223]], "text": "ALTA VISTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "AITA VISTA", "recog_valid": false, "glyph_recog_text": "ALTA VISTA", "glyph_recog_ld": 0.9000000999999}, {"polygon": [[11, 157], [15, 170], [33, 169], [34, 187], [96, 188], [96, 158]], "text": "WESTERN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "MESTERN", "recog_valid": false, "glyph_recog_text": "WESTERN", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479263.jpg", "caption": "a sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479272.jpg", "caption": "a woman talking on a cell phone in a park", "annotations": [{"polygon": [[207, 327], [233, 335], [226, 398], [199, 390]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "P", "recog_valid": false, "glyph_recog_text": "c", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348202.jpg", "caption": "a young man on a skateboard in a skate park", "annotations": [{"polygon": [[280, 148], [295, 142], [307, 143], [321, 150], [333, 160], [341, 134], [323, 125], [303, 120], [285, 119], [279, 142]], "text": "RVCA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "8Y60", "recog_valid": false, "glyph_recog_text": "RVCA", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217141.jpg", "caption": "a sign has texts on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348217.jpg", "caption": "a man in white playing tennis on a court", "annotations": [{"polygon": [[2, 238], [509, 244], [510, 311], [2, 297]], "text": "AMPINSHIPS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AMPLNSHIPS", "recog_valid": false, "glyph_recog_text": "AMPINSHIPS", "glyph_recog_ld": 0.9000000999999}, {"polygon": [[271, 348], [270, 397], [1, 384], [2, 340]], "text": "SADIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TSAS", "recog_valid": false, "glyph_recog_text": "SADIS", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[432, 166], [441, 188], [463, 179], [454, 154]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "E", "recog_valid": false, "glyph_recog_text": "三", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217165.jpg", "caption": "a group of black horses pulling a carriage down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348269.jpg", "caption": "a woman sitting at a desk with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479361.jpg", "caption": "people walking on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217226.jpg", "caption": "a yellow and black train at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217238.jpg", "caption": "a blender sitting on a counter next to a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479394.jpg", "caption": "a toilet with blue and white porcelain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217276.jpg", "caption": "three boys sitting on a bench", "annotations": [{"polygon": [[235, 252], [230, 284], [339, 293], [321, 259]], "text": "FILA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FIUS", "recog_valid": false, "glyph_recog_text": "FILA", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479427.jpg", "caption": "a blurry image of a moving train at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217290.jpg", "caption": "a row of buses parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217293.jpg", "caption": "a man holding a baseball bat in a park", "annotations": [{"polygon": [[210, 262], [247, 260], [239, 303], [206, 302]], "text": "B", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "一", "recog_valid": false, "glyph_recog_text": "m", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348381.jpg", "caption": "a man in a kitchen with a cake and a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000086243.jpg", "caption": "a little girl holding a wii remote", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217327.jpg", "caption": "a woman standing on a street corner at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000086292.jpg", "caption": "a woman playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217376.jpg", "caption": "a man is working on a trench with a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217379.jpg", "caption": "a street sign on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348467.jpg", "caption": "a man standing in a bathroom with a shower head", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348480.jpg", "caption": "a living room with a christmas tree and a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217407.jpg", "caption": "a flower in a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479550.jpg", "caption": "a woman in a flower crown riding a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479560.jpg", "caption": "a man in a blue shirt and black shorts is about to hit a tennis ball", "annotations": [{"polygon": [[100, 238], [89, 268], [115, 273], [134, 272], [152, 274], [159, 255], [161, 247], [155, 240], [124, 238]], "text": "aeg", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AEG", "recog_valid": false, "glyph_recog_text": "aeg", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348510.jpg", "caption": "a train traveling down a road with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479591.jpg", "caption": "a street with a car driving down it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217461.jpg", "caption": "two girls sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479611.jpg", "caption": "a white horse standing in a fenced in area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000086407.jpg", "caption": "a man in a black shirt smiling", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217484.jpg", "caption": "a silver train on the tracks in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479658.jpg", "caption": "a group of people standing around a refrigerator", "annotations": [{"polygon": [[84, 273], [163, 290], [172, 287], [196, 276], [195, 274], [154, 267], [95, 259], [89, 260]], "text": "GC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZLT", "recog_valid": false, "glyph_recog_text": "GC", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348595.jpg", "caption": "a man with a face painted with polish flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348604.jpg", "caption": "a fighter jet taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479680.jpg", "caption": "a man wearing an orange shirt is riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348609.jpg", "caption": "a group of people standing in the snow with skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479687.jpg", "caption": "a group of people sitting on a bench", "annotations": [{"polygon": [[137, 138], [142, 136], [145, 143], [189, 129], [193, 137], [197, 147], [198, 147], [149, 168], [140, 168], [130, 156], [131, 145]], "text": "Camp", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "C", "recog_valid": false, "glyph_recog_text": "Camp", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479707.jpg", "caption": "a group of people sitting at a table in a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348637.jpg", "caption": "chapman strait's christmas cake", "annotations": [{"polygon": [[168, 230], [184, 227], [190, 240], [200, 235], [202, 231], [202, 229], [222, 229], [222, 250], [224, 254], [227, 252], [228, 254], [226, 258], [223, 259], [219, 256], [199, 257], [183, 256], [168, 257], [167, 251]], "text": "We'll", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ne'll", "recog_valid": false, "glyph_recog_text": "We'll", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[236, 236], [238, 235], [243, 236], [246, 236], [249, 236], [253, 249], [254, 252], [256, 237], [253, 232], [253, 228], [256, 227], [259, 229], [260, 234], [259, 237], [267, 235], [273, 243], [273, 252], [275, 253], [276, 245], [276, 239], [276, 236], [281, 233], [284, 239], [286, 246], [286, 251], [285, 256], [282, 258], [275, 256], [270, 257], [265, 256], [260, 254], [259, 254], [256, 256], [250, 256], [244, 253], [239, 257], [238, 253], [238, 246], [238, 244], [234, 240], [233, 239]], "text": "Miss", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "miao", "recog_valid": false, "glyph_recog_text": "Miss", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[304, 236], [341, 234], [341, 243], [342, 248], [344, 248], [348, 246], [349, 249], [346, 254], [340, 253], [339, 247], [335, 253], [317, 254], [316, 264], [314, 271], [306, 272], [301, 268], [300, 264], [302, 259], [306, 258], [308, 259], [308, 262], [306, 263], [305, 264], [307, 266], [311, 265], [313, 261], [315, 255], [314, 250], [310, 253], [306, 253], [303, 248], [303, 241]], "text": "you", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "you", "recog_valid": true, "glyph_recog_text": "you", "glyph_recog_ld": 1.0}, {"polygon": [[168, 279], [176, 279], [177, 284], [182, 283], [183, 278], [185, 278], [186, 297], [186, 301], [190, 294], [195, 296], [196, 303], [199, 297], [202, 296], [206, 297], [209, 303], [209, 307], [211, 300], [212, 294], [215, 294], [215, 297], [220, 297], [222, 298], [222, 280], [223, 278], [229, 278], [229, 290], [228, 306], [229, 309], [233, 302], [237, 296], [238, 293], [242, 294], [244, 295], [246, 295], [246, 290], [246, 287], [248, 285], [252, 286], [253, 289], [250, 293], [249, 300], [249, 308], [250, 308], [252, 304], [253, 297], [256, 294], [258, 294], [262, 295], [264, 296], [265, 303], [268, 313], [272, 318], [267, 318], [260, 313], [215, 313], [214, 333], [212, 335], [207, 332], [209, 314], [204, 313], [182, 313], [179, 310], [174, 313], [168, 313], [165, 304], [162, 294], [163, 287], [165, 282]], "text": "CHaplain", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Chaplain", "recog_valid": false, "glyph_recog_text": "CHaplair", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[288, 273], [294, 276], [299, 276], [301, 278], [301, 282], [306, 284], [308, 293], [314, 294], [313, 288], [313, 285], [317, 283], [321, 286], [320, 290], [320, 293], [320, 299], [320, 305], [323, 300], [325, 292], [328, 291], [331, 293], [334, 292], [349, 291], [368, 293], [370, 303], [370, 307], [371, 311], [371, 311], [365, 311], [364, 310], [359, 311], [357, 309], [352, 313], [351, 323], [350, 327], [347, 331], [340, 331], [340, 325], [342, 317], [348, 311], [348, 310], [343, 308], [339, 313], [329, 311], [312, 314], [301, 313], [285, 313], [276, 307], [274, 300], [275, 295], [279, 294], [280, 298], [279, 301], [280, 304], [283, 308], [287, 309], [289, 305], [288, 299], [283, 288], [280, 280], [282, 272]], "text": "Stringer", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Sthimga", "recog_valid": false, "glyph_recog_text": "Stringe", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217574.jpg", "caption": "a white scooter parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000086524.jpg", "caption": "three people skateboarding in front of the eiffel tower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217596.jpg", "caption": "a fire hydrant sitting in front of a building with graffiti on it", "annotations": [{"polygon": [[430, 167], [430, 199], [498, 201], [493, 176]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "like", "recog_valid": true, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}, {"polygon": [[358, 80], [358, 80], [423, 82], [421, 54], [359, 50]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "like", "recog_valid": true, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}, {"polygon": [[324, 153], [388, 158], [387, 127], [325, 123]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "like", "recog_valid": true, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}, {"polygon": [[390, 254], [453, 257], [452, 227], [390, 224]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "like", "recog_valid": true, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}, {"polygon": [[284, 76], [334, 76], [336, 97], [287, 106]], "text": "you", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Xu", "recog_valid": false, "glyph_recog_text": "you", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[445, 60], [446, 92], [511, 96], [511, 66]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "tikc", "recog_valid": false, "glyph_recog_text": "like", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[393, 110], [393, 140], [458, 138], [452, 114]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "like", "recog_valid": true, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}, {"polygon": [[396, 141], [400, 172], [449, 162], [449, 141]], "text": "you", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "You", "recog_valid": false, "glyph_recog_text": "you", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[296, 232], [300, 262], [349, 256], [348, 237]], "text": "YOU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "You", "recog_valid": false, "glyph_recog_text": "YOU", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[294, 200], [295, 231], [356, 235], [358, 206]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "like", "recog_valid": true, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}, {"polygon": [[449, 258], [450, 287], [511, 291], [513, 263]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "like", "recog_valid": true, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}, {"polygon": [[408, 320], [407, 351], [471, 356], [472, 332]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "like", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[409, 353], [411, 384], [462, 378], [462, 357]], "text": "YOU", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "764", "recog_valid": false, "glyph_recog_text": "YOU", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[340, 367], [343, 397], [394, 391], [392, 371]], "text": "You", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "You", "recog_valid": true, "glyph_recog_text": "You", "glyph_recog_ld": 1.0}, {"polygon": [[336, 365], [401, 369], [399, 336], [338, 335]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "like", "recog_valid": true, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}, {"polygon": [[310, 311], [310, 311], [312, 342], [361, 334], [359, 315]], "text": "you", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "You", "recog_valid": false, "glyph_recog_text": "you", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[310, 277], [308, 309], [373, 311], [366, 287]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "like", "recog_valid": true, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}, {"polygon": [[172, 316], [171, 348], [234, 349], [231, 321]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "like", "recog_valid": true, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}, {"polygon": [[3, 312], [66, 322], [65, 348], [3, 342]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "like", "recog_valid": true, "glyph_recog_text": "like", "glyph_recog_ld": 1.0}, {"polygon": [[41, 286], [42, 316], [84, 312], [84, 291]], "text": "You", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "zc", "recog_valid": false, "glyph_recog_text": "You", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[38, 254], [37, 285], [85, 288], [83, 259]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "王", "recog_valid": false}, {"polygon": [[48, 175], [50, 208], [85, 209], [83, 178]], "text": "lik", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "lik", "recog_valid": true}, {"polygon": [[53, 209], [55, 240], [84, 237], [85, 209]], "text": "yo", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Yo", "recog_valid": false}, {"polygon": [[-2, 203], [51, 209], [51, 235], [0, 233]], "text": "ikw", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "iKe", "recog_valid": false}, {"polygon": [[10, 51], [11, 82], [76, 82], [77, 51]], "text": "like", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "like", "recog_valid": true}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348675.jpg", "caption": "a large wooden grandfather clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348680.jpg", "caption": "a man and woman standing on a dock with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479769.jpg", "caption": "a ski lift in the fog with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000086573.jpg", "caption": "two plates with food and wine on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348722.jpg", "caption": "a motorcycle is parked in front of a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217653.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217676.jpg", "caption": "a police officer standing next to a parked car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479825.jpg", "caption": "1947 p-51 mustang", "annotations": [{"polygon": [[230, 274], [230, 274], [273, 285], [267, 311], [223, 301]], "text": "CM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CM", "recog_valid": true, "glyph_recog_text": "CM", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348759.jpg", "caption": "a clock on a tall building in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217707.jpg", "caption": "a laptop and a desktop computer sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348805.jpg", "caption": "a man is standing next to a horse with a bridle on", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348809.jpg", "caption": "a woman eating a donut", "annotations": [{"polygon": [[252, 434], [337, 428], [339, 451], [296, 458], [251, 456]], "text": "SCARLET", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Scarlet", "recog_valid": false, "glyph_recog_text": "SCARLET", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348826.jpg", "caption": "a large airplane parked on a wet runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479899.jpg", "caption": "a small airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217769.jpg", "caption": "two fire trucks parked in a large building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000086715.jpg", "caption": "two men playing frisbee in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348874.jpg", "caption": "a stop sign and a traffic light on a street", "annotations": [{"polygon": [[160, 155], [160, 155], [250, 154], [249, 190], [159, 188]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[255, 333], [253, 380], [376, 380], [376, 340], [289, 337]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000086750.jpg", "caption": "a woman and a man sitting at a table with a large pizza", "annotations": [{"polygon": [[439, 75], [438, 101], [496, 84], [494, 61], [472, 64]], "text": "BSSFDLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "BSSEDLE", "recog_valid": false, "glyph_recog_text": "BSSFOLE", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217822.jpg", "caption": "two pictures of a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217864.jpg", "caption": "a street sign and a traffic light on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000086797.jpg", "caption": "an old white refrigerator sitting in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348950.jpg", "caption": "a person holding a wii remote", "annotations": [{"polygon": [[415, 164], [437, 193], [447, 177], [432, 153]], "text": "Wii", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "!!M", "recog_valid": false, "glyph_recog_text": "Wil", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348968.jpg", "caption": "two men standing next to a moose cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480059.jpg", "caption": "a man holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217925.jpg", "caption": "a baseball player swinging at a pitch during a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217929.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[314, 347], [329, 352], [337, 342], [349, 336], [367, 334], [377, 332], [391, 336], [392, 330], [378, 324], [370, 322], [347, 321], [330, 327]], "text": "SCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SCH", "recog_valid": true, "glyph_recog_text": "SCH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217959.jpg", "caption": "a cow is standing in a pen with a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349038.jpg", "caption": "a plane is parked at an airport", "annotations": [{"polygon": [[221, 286], [228, 291], [189, 339], [182, 334]], "text": "SOUTHWEST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SOUTHWEST", "recog_valid": true, "glyph_recog_text": "", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349044.jpg", "caption": "a counter with a blender, a blender, and a coffee maker", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349047.jpg", "caption": "a parking lot with a lot of motorcycles parked in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349050.jpg", "caption": "a man sitting at a table with two plates of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349059.jpg", "caption": "a bathroom sink with toothpaste and other items on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349074.jpg", "caption": "a white bus parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349083.jpg", "caption": "a street sign with the words madison and main st", "annotations": [{"polygon": [[146, 314], [146, 314], [145, 376], [307, 374], [307, 310]], "text": "MAIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MAIN", "recog_valid": true, "glyph_recog_text": "MAIN", "glyph_recog_ld": 1.0}, {"polygon": [[348, 314], [351, 372], [419, 375], [429, 323], [425, 313], [359, 312]], "text": "ST.", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ST", "recog_valid": false, "glyph_recog_text": "ST.", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[155, 131], [154, 187], [265, 230], [289, 240], [315, 252], [333, 261], [342, 267], [339, 215], [328, 204], [309, 194], [279, 182], [259, 172], [227, 160], [201, 150]], "text": "MADISONS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MADISON", "recog_valid": false, "glyph_recog_text": "MADISONS", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[350, 222], [352, 277], [397, 285], [403, 233]], "text": "ST.", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ST", "recog_valid": false, "glyph_recog_text": "ST.", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480172.jpg", "caption": "a steam train traveling through a lush green field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480196.jpg", "caption": "a group of people playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349133.jpg", "caption": "a slice of pizza on a plate", "annotations": [{"polygon": [[153, 412], [503, 367], [494, 384], [335, 411], [190, 429], [172, 432]], "text": "VEGAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "VEGAINO SOIN", "recog_valid": false, "glyph_recog_text": "V E G A N", "glyph_recog_ld": 0.25000062499947917}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218099.jpg", "caption": "a man laying on a bed", "annotations": [{"polygon": [[305, 161], [289, 187], [231, 156], [212, 124], [230, 97], [279, 119]], "text": "Emer", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bmet", "recog_valid": false, "glyph_recog_text": "Emer", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[164, 372], [203, 401], [188, 419], [151, 395]], "text": "X", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "H", "recog_valid": false, "glyph_recog_text": "X", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218116.jpg", "caption": "a baseball game in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087058.jpg", "caption": "a large blue airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349230.jpg", "caption": "a white plate with a pastry on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218158.jpg", "caption": "two men standing on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087113.jpg", "caption": "a man is standing on a sidewalk talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349271.jpg", "caption": "a train is pulling a train car on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218201.jpg", "caption": "people are sitting in a subway train has texts on the doors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218223.jpg", "caption": "a woman holding a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480368.jpg", "caption": "a man in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349318.jpg", "caption": "an american airlines airplane is flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480415.jpg", "caption": "a large jetliner on the runway at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218274.jpg", "caption": "a large teddy bear sitting on top of a float", "annotations": [{"polygon": [[58, 449], [66, 353], [344, 332], [351, 450]], "text": "TOYS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "88", "recog_valid": false, "glyph_recog_text": "TOYS", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[386, 328], [511, 328], [513, 448], [388, 451]], "text": "SH", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "SF", "recog_valid": false, "glyph_recog_text": "SH", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349352.jpg", "caption": "a street sign with a street name and a university sign", "annotations": [{"polygon": [[289, 287], [289, 287], [471, 187], [466, 211], [467, 228], [289, 322]], "text": "UNIVERSITY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNIVERSITY", "recog_valid": true, "glyph_recog_text": "UNIVERSITY", "glyph_recog_ld": 1.0}, {"polygon": [[2, 140], [0, 173], [172, 241], [170, 209]], "text": "MERALD", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "EMERALD", "recog_valid": false, "glyph_recog_text": "MERALD", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218282.jpg", "caption": "a beach with umbrellas and chairs and a boat in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349363.jpg", "caption": "a city street with cars and traffic lights", "annotations": [{"polygon": [[451, 148], [452, 158], [485, 126], [482, 112]], "text": "WELLSFARGO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ZOSE", "recog_valid": false, "glyph_recog_text": "WELUEAROO", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480443.jpg", "caption": "a weather vane on top of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349400.jpg", "caption": "a man is taking a picture of himself in a bathroom mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480474.jpg", "caption": "a group of people on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349403.jpg", "caption": "a woman walking down a street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480482.jpg", "caption": "an old black and white photo of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349442.jpg", "caption": "a seaplane parked on the shore of a body of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087308.jpg", "caption": "a man on a skateboard doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480538.jpg", "caption": "a group of people on skateboards in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218399.jpg", "caption": "a statue of a boy holding an ice cream cone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349472.jpg", "caption": "a man riding a motorcycle in a parade", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218446.jpg", "caption": "a large bus parked on the side of the road", "annotations": [{"polygon": [[471, 189], [467, 226], [441, 220], [430, 183]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "下", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218455.jpg", "caption": "a street with a red bus and a banner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218467.jpg", "caption": "a group of people standing around a stage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480656.jpg", "caption": "a black dog with a teddy bear on the floor", "annotations": [{"polygon": [[289, 86], [289, 139], [503, 141], [501, 86], [289, 85]], "text": "MILES", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "MILES", "recog_valid": true, "glyph_recog_text": "MILES", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480661.jpg", "caption": "a traffic light with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087458.jpg", "caption": "australian navy ship in the river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218557.jpg", "caption": "a sign that says confisiere en gras", "annotations": [{"polygon": [[309, 279], [437, 252], [438, 201], [307, 231]], "text": "GROS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GROS", "recog_valid": true, "glyph_recog_text": "GROS", "glyph_recog_ld": 1.0}, {"polygon": [[253, 290], [297, 281], [297, 247], [255, 256]], "text": "EN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EN", "recog_valid": true, "glyph_recog_text": "EN", "glyph_recog_ld": 1.0}, {"polygon": [[243, 293], [247, 239], [218, 238], [136, 257], [47, 288], [39, 320], [45, 329], [178, 303]], "text": "CONFISERIE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CONEISERIE", "recog_valid": false, "glyph_recog_text": "CONFISERIE", "glyph_recog_ld": 0.9000000999999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087501.jpg", "caption": "a man standing on a snow covered slope with skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087507.jpg", "caption": "two people are standing in the window at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349658.jpg", "caption": "a woman swinging a bat on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349709.jpg", "caption": "a woman in a kitchen cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218649.jpg", "caption": "a large white airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218647.jpg", "caption": "a stop sign with a bunch of dead leaves on it", "annotations": [{"polygon": [[197, 35], [302, 31], [321, 2], [188, 1], [180, 18], [188, 31]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "S1oP", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218652.jpg", "caption": "a group of girls playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480797.jpg", "caption": "a bicycle parked next to a cruise ship", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349725.jpg", "caption": "a woman holding a trophy and holding her skis", "annotations": [{"polygon": [[415, 428], [415, 448], [466, 455], [479, 441], [476, 432], [438, 431], [428, 425]], "text": "Cup", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Cup", "recog_valid": true, "glyph_recog_text": "Cup", "glyph_recog_ld": 1.0}, {"polygon": [[231, 336], [223, 394], [272, 397], [276, 348]], "text": "Audi", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "(", "recog_valid": false, "glyph_recog_text": "C", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[222, 98], [262, 212], [279, 209], [239, 94]], "text": "BAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CVE", "recog_valid": false, "glyph_recog_text": "BAD", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480812.jpg", "caption": "a man with a beard and no shirt eating a hot dog", "annotations": [{"polygon": [[201, 120], [197, 141], [226, 150], [246, 143], [248, 124]], "text": "HAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HAnLi8", "recog_valid": false, "glyph_recog_text": "HAN", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480823.jpg", "caption": "a group of sheep eating hay in a pen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087616.jpg", "caption": "a group of people sitting at desks in front of laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480863.jpg", "caption": "a grandfather clock with a large face", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087647.jpg", "caption": "a man and woman sitting on a train", "annotations": [{"polygon": [[283, 143], [343, 147], [345, 114], [283, 111]], "text": "SPECI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPECL", "recog_valid": false, "glyph_recog_text": "SPECI", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218759.jpg", "caption": "a boy eating a piece of cake", "annotations": [{"polygon": [[161, 252], [162, 283], [220, 280], [290, 273], [297, 267], [290, 241], [237, 248], [192, 250]], "text": "SASAMA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SASAMA", "recog_valid": true, "glyph_recog_text": "SASAMA", "glyph_recog_ld": 1.0}, {"polygon": [[173, 444], [148, 467], [159, 474], [189, 480], [202, 489], [234, 495], [249, 504], [289, 505], [304, 497], [309, 487], [309, 479], [302, 471], [289, 469], [241, 459], [204, 456]], "text": "Chien", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "toryo", "recog_valid": false, "glyph_recog_text": "Chien", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[429, 504], [381, 505], [363, 495], [335, 495], [329, 485], [322, 468], [333, 465], [363, 468], [387, 472], [409, 483]], "text": "uh", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "u h", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218763.jpg", "caption": "a clock on a pole in front of a building", "annotations": [{"polygon": [[203, 269], [360, 287], [362, 312], [293, 305], [248, 301], [236, 299], [222, 296], [208, 298], [200, 292]], "text": "GOODERHAM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GOODERHAM", "recog_valid": true, "glyph_recog_text": "GOODERHAM", "glyph_recog_ld": 1.0}, {"polygon": [[386, 291], [458, 298], [460, 319], [451, 322], [439, 319], [391, 314]], "text": "WORTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "WORTS", "recog_valid": true, "glyph_recog_text": "WORTS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480914.jpg", "caption": "a group of people walking on the tracks of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218776.jpg", "caption": "a baseball game on tv with a batter at bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087726.jpg", "caption": "a large passenger jet flying over the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480947.jpg", "caption": "a group of people playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349905.jpg", "caption": "a woman sitting on a bench with two children", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480979.jpg", "caption": "a warning sign and a fire hydrant on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480990.jpg", "caption": "a man in an orange jacket skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349931.jpg", "caption": "a group of people standing in front of a food truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218863.jpg", "caption": "a display of stuffed bears with prices on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218870.jpg", "caption": "a dog sitting in the back of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349974.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[416, 72], [410, 100], [511, 103], [510, 79]], "text": "StateFarm", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Stateran", "recog_valid": false, "glyph_recog_text": "StateFarm", "glyph_recog_ld": 0.6666670370366254}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481049.jpg", "caption": "two large airplanes parked on the tarmac with the word anana on them", "annotations": [{"polygon": [[295, 192], [340, 126], [363, 146], [311, 220]], "text": "ANA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ANA", "recog_valid": true, "glyph_recog_text": "ANA", "glyph_recog_ld": 1.0}, {"polygon": [[408, 271], [426, 284], [427, 335], [359, 430], [325, 383]], "text": "ANA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ANA", "recog_valid": true, "glyph_recog_text": "ANA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087862.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[321, 236], [449, 236], [440, 281], [307, 279]], "text": "ACME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ACME", "recog_valid": true, "glyph_recog_text": "ACME", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350013.jpg", "caption": "a street sign with a hand sign on it", "annotations": [{"polygon": [[146, 141], [147, 123], [197, 104], [198, 122]], "text": "Market", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Market", "recog_valid": true, "glyph_recog_text": "Markel", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350014.jpg", "caption": "a dog walking on the pavement", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087871.jpg", "caption": "a person sitting at a desk with a computer and a cat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481120.jpg", "caption": "a pair of scissors and a wooden tray with green beans", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218980.jpg", "caption": "a person riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350059.jpg", "caption": "a black and yellow train engine on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218990.jpg", "caption": "a table with a bunch of vegetables and fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350063.jpg", "caption": "a stop sign is seen through a window", "annotations": [{"polygon": [[161, 264], [203, 258], [218, 253], [218, 243], [223, 242], [227, 236], [227, 228], [224, 222], [222, 221], [200, 221], [175, 224], [160, 225], [156, 229], [154, 235], [155, 241], [154, 259]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350086.jpg", "caption": "a gas pump and a motorcycle sitting outside of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219025.jpg", "caption": "a group of people standing around a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219041.jpg", "caption": "a man in white tennis clothes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481209.jpg", "caption": "a street with a fire hydrant and a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481222.jpg", "caption": "a baseball player swinging at a pitch during a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350170.jpg", "caption": "two men playing tennis on a blue court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088047.jpg", "caption": "a fire hydrant on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481292.jpg", "caption": "a train on a track with a train station in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350235.jpg", "caption": "a group of men playing basketball on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219173.jpg", "caption": "a street sign with a traffic light and a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219179.jpg", "caption": "a stack of books on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481345.jpg", "caption": "a vase with flowers in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219204.jpg", "caption": "a white toilet sitting next to a black cabinet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481365.jpg", "caption": "a train on the tracks with a blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088165.jpg", "caption": "a bus with a large front and back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350331.jpg", "caption": "photographer - johan van der kooi", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350358.jpg", "caption": "two men playing frisbee", "annotations": [{"polygon": [[164, 402], [200, 436], [236, 405], [184, 376]], "text": "1", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "rrl", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[278, 392], [310, 346], [355, 398], [326, 449]], "text": "1", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219303.jpg", "caption": "a large jetliner taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219322.jpg", "caption": "a man holding a skateboard in a warehouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481467.jpg", "caption": "a man standing next to an elephant in a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219335.jpg", "caption": "two pictures of a soccer player kicking the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088282.jpg", "caption": "a collage of pictures of food on plates", "annotations": [{"polygon": [[23, 432], [134, 398], [144, 418], [31, 456]], "text": "SUKJAI ", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "SUKJAI", "recog_valid": false, "glyph_recog_text": "SUKJAI", "glyph_recog_ld": 1.0}, {"polygon": [[72, 449], [146, 424], [152, 435], [76, 462]], "text": "Resaturant", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Restaurant", "recog_valid": false, "glyph_recog_text": "Resabuelant", "glyph_recog_ld": 0.6363639669418482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219355.jpg", "caption": "a baseball game with a batter and catcher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481530.jpg", "caption": "a donut with sprinkles and a cup of coffee", "annotations": [{"polygon": [[190, 97], [195, 120], [246, 117], [282, 110], [279, 88], [241, 95]], "text": "CURLY'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CURLY'S", "recog_valid": true, "glyph_recog_text": "CURLY'S", "glyph_recog_ld": 1.0}, {"polygon": [[118, 164], [122, 185], [150, 182], [184, 168], [179, 153], [163, 159]], "text": "Campbell's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "bampbelli", "recog_valid": false, "glyph_recog_text": "Campbells", "glyph_recog_ld": 0.7777780246910837}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481532.jpg", "caption": "a yellow fire hydrant", "annotations": [{"polygon": [[93, 166], [55, 190], [75, 225], [112, 258], [139, 224], [113, 202], [101, 184]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "P4E", "recog_valid": false, "glyph_recog_text": "THE", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[153, 231], [139, 265], [168, 275], [209, 279], [262, 278], [296, 265], [334, 239], [324, 217], [319, 199], [292, 216], [265, 228], [218, 234], [191, 235]], "text": "CORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "COREA", "recog_valid": false, "glyph_recog_text": "CORE", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350481.jpg", "caption": "a cat laying on a desk next to a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350497.jpg", "caption": "a man on a red scooter with a grill on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481571.jpg", "caption": "a road closed sign is shown in front of a parking lot", "annotations": [{"polygon": [[178, 230], [284, 221], [288, 260], [184, 279]], "text": "ROAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ROAD", "recog_valid": true, "glyph_recog_text": "ROAD", "glyph_recog_ld": 1.0}, {"polygon": [[151, 320], [303, 277], [309, 314], [162, 368]], "text": "CLOSED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CLOSED", "recog_valid": true, "glyph_recog_text": "CLOSED", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219435.jpg", "caption": "a small airplane sitting on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481585.jpg", "caption": "a surfer in a red shirt riding a wave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350514.jpg", "caption": "a train is pulling into a station with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481586.jpg", "caption": "a woman standing on a skateboard with her legs up", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088377.jpg", "caption": "1921 british motorcycle - no number", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088388.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[240, 203], [234, 211], [248, 223], [253, 229], [257, 234], [263, 243], [272, 240], [268, 231], [262, 223], [252, 212], [241, 203]], "text": "SELLERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SELLERS", "recog_valid": true, "glyph_recog_text": "SELLERS", "glyph_recog_ld": 1.0}, {"polygon": [[235, 221], [215, 244], [236, 263], [256, 243]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "S", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481607.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[176, 153], [201, 143], [220, 166], [191, 176]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "9", "recog_valid": true, "glyph_recog_text": "9", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481631.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219488.jpg", "caption": "a black and white photo of a man on a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219489.jpg", "caption": "a giraffe standing next to a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350585.jpg", "caption": "a man standing outside of a pizza shop", "annotations": [{"polygon": [[96, 86], [105, 95], [170, 119], [173, 91], [157, 87]], "text": "ARS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "AS", "recog_valid": false, "glyph_recog_text": "ARS", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[176, 86], [186, 136], [265, 155], [267, 152], [258, 128], [215, 110], [207, 86]], "text": "PIZZA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Pozz", "recog_valid": false, "glyph_recog_text": "PIZZA", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481663.jpg", "caption": "a baseball pitcher in the middle of throwing a pitch", "annotations": [{"polygon": [[218, 149], [225, 190], [264, 185], [259, 142]], "text": "35", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "35", "recog_valid": true, "glyph_recog_text": "3", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088449.jpg", "caption": "two trains are parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219535.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[195, 82], [276, 75], [278, 97], [239, 107], [194, 110]], "text": "MLR", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "MLR", "recog_valid": true, "glyph_recog_text": "MLR", "glyph_recog_ld": 1.0}, {"polygon": [[292, 74], [465, 64], [511, 65], [510, 95], [293, 107]], "text": "NETWOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "NETWOR", "recog_valid": true, "glyph_recog_text": "NETWOR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350620.jpg", "caption": "a woman bending down to pick up a tennis ball", "annotations": [{"polygon": [[362, 133], [437, 146], [435, 164], [425, 167], [358, 157], [356, 150], [359, 136]], "text": "wilson", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Wioon", "recog_valid": false, "glyph_recog_text": "wilson", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[331, 114], [390, 85], [448, 85], [418, 99], [419, 111], [400, 120], [398, 109], [330, 150]], "text": "Schwepp", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Schwepp", "recog_valid": true, "glyph_recog_text": "Schwepp", "glyph_recog_ld": 1.0}, {"polygon": [[435, 333], [445, 378], [412, 379], [402, 341]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "0", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088477.jpg", "caption": "a cow standing in the middle of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481702.jpg", "caption": "three people cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350633.jpg", "caption": "a train traveling down the tracks with a few cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219580.jpg", "caption": "a street with two buildings and a parking sign", "annotations": [{"polygon": [[380, 135], [382, 174], [351, 177], [347, 139]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "a", "recog_valid": false, "glyph_recog_text": "Q", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481749.jpg", "caption": "a dog jumping into the air to catch a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219606.jpg", "caption": "a dog standing in a field with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350690.jpg", "caption": "a pizza with olives and cheese on a white box", "annotations": [{"polygon": [[2, 432], [82, 432], [84, 350], [81, 315], [81, 233], [63, 235], [29, 238], [24, 256], [1, 231]], "text": "Cola", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "9220", "recog_valid": false, "glyph_recog_text": "Uo.", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088549.jpg", "caption": "a train on the tracks with a yellow and black train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481772.jpg", "caption": "a yellow school bus parked in front of a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088576.jpg", "caption": "a woman talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219654.jpg", "caption": "a school bus parked in front of a church", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481804.jpg", "caption": "a man sitting on a couch with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481807.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481810.jpg", "caption": "a black and white photo of a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350741.jpg", "caption": "a pizza with toppings on it on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219687.jpg", "caption": "a living room with two leather chairs and a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350759.jpg", "caption": "a train traveling down the tracks under a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481851.jpg", "caption": "a desk with a laptop, a computer monitor, a keyboard, a mouse and a cup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350810.jpg", "caption": "a cat sleeping on a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350815.jpg", "caption": "a jockey is on a horse with a man in a suit", "annotations": [{"polygon": [[48, 355], [67, 355], [93, 298], [88, 296], [72, 302], [51, 344]], "text": "1", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350819.jpg", "caption": "a man sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481894.jpg", "caption": "a man on a motorcycle in a rural area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350837.jpg", "caption": "a woman in a blue dress holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481920.jpg", "caption": "two men playing tennis on a blue court with a large crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481931.jpg", "caption": "two men cutting a cake with an american flag on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219796.jpg", "caption": "a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219802.jpg", "caption": "a bus driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088730.jpg", "caption": "a man in a white shirt and black shorts is about to hit a tennis ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350878.jpg", "caption": "a person holding a small piece of jewelry with the words i love miniatures", "annotations": [{"polygon": [[308, 294], [312, 328], [495, 328], [492, 285]], "text": "miniatures", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "miniatures", "recog_valid": true, "glyph_recog_text": "miniatures", "glyph_recog_ld": 1.0}, {"polygon": [[357, 233], [358, 270], [446, 269], [443, 232]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LOVE", "recog_valid": true, "glyph_recog_text": "LOVE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481954.jpg", "caption": "a person sitting on a couch watching a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000350885.jpg", "caption": "a train on the tracks in front of a red building", "annotations": [{"polygon": [[207, 309], [278, 308], [279, 272], [208, 272], [202, 278], [198, 287], [197, 296], [200, 304], [203, 308]], "text": "CSX", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "CSX", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219855.jpg", "caption": "mercedes-benz m-class military vehicle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088802.jpg", "caption": "a small airplane with a propeller on the ground", "annotations": [{"polygon": [[319, 264], [272, 291], [260, 302], [280, 330], [440, 331], [453, 320], [455, 284], [408, 284], [380, 275], [344, 261]], "text": "florzon", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "学larzan", "recog_valid": false, "glyph_recog_text": "florzon", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088803.jpg", "caption": "a girl is cutting out a picture of a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482036.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[205, 228], [233, 221], [238, 246], [209, 252]], "text": "19", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "19", "recog_valid": true, "glyph_recog_text": "19", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219910.jpg", "caption": "a group of people sitting on motorcycles in a line", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088845.jpg", "caption": "a jockey is sitting on a horse with a man in a green shirt", "annotations": [{"polygon": [[338, 398], [374, 387], [384, 410], [393, 437], [400, 481], [367, 491]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "CO", "recog_valid": false, "glyph_recog_text": "6", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219921.jpg", "caption": "a person on a dirt bike doing a jump", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351002.jpg", "caption": "a man is doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219935.jpg", "caption": "a man walking on a platform with a train approaching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482079.jpg", "caption": "a train engine on the tracks in a rural area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482084.jpg", "caption": "a table with bananas, peanut butter, and other food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219958.jpg", "caption": "a man wearing a banana on his head", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482127.jpg", "caption": "a bird is perched on top of a book", "annotations": [{"polygon": [[381, 273], [427, 254], [432, 265], [385, 285]], "text": "SUBJECT", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SUBJECT", "recog_valid": true, "glyph_recog_text": "SUBJECT", "glyph_recog_ld": 1.0}, {"polygon": [[363, 298], [426, 271], [430, 279], [366, 307]], "text": "THE SUBJECT", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CERTAINTY", "recog_valid": false, "glyph_recog_text": "配些损T", "glyph_recog_ld": 0.1111120987643347}, {"polygon": [[342, 403], [344, 413], [391, 388], [389, 381]], "text": "Jacques-Alain A", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Jacques-Alain", "recog_valid": false, "glyph_recog_text": "waaaAan", "glyph_recog_ld": 0.30769284023627674}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351112.jpg", "caption": "united airlines boeing 737-800 nr 807", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220062.jpg", "caption": "a silver flip phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351139.jpg", "caption": "a cake shaped like a toilet with the words good luck", "annotations": [{"polygon": [[232, 229], [232, 229], [237, 248], [268, 276], [269, 286], [280, 287], [284, 280], [284, 275], [278, 268], [269, 261], [260, 248], [239, 229]], "text": "50 Today", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "K8cy", "recog_valid": false, "glyph_recog_text": "50 Today", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[248, 175], [282, 195], [290, 224], [284, 248], [282, 249], [275, 245], [273, 228], [238, 196]], "text": "Andy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Aen dhnyg", "recog_valid": false, "glyph_recog_text": "Andy", "glyph_recog_ld": 0.44444506172770915}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351157.jpg", "caption": "a bunch of sandwiches on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220089.jpg", "caption": "1946 chevrolet pickup for sale in for sale on gocars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351176.jpg", "caption": "a woman playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482251.jpg", "caption": "a woman and a man playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482252.jpg", "caption": "a man sitting on a motorcycle", "annotations": [{"polygon": [[264, 279], [264, 279], [265, 284], [319, 259], [316, 253]], "text": "POLICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ROULOE", "recog_valid": false, "glyph_recog_text": "Ralicr", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220115.jpg", "caption": "four men pose for a photo on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351218.jpg", "caption": "a refrigerator with a door open in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351221.jpg", "caption": "a skateboarder is doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482295.jpg", "caption": "a group of police officers on motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482300.jpg", "caption": "a red and white truck with a safety truck on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482332.jpg", "caption": "a group of people sitting on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351283.jpg", "caption": "a red train is on the tracks in a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482371.jpg", "caption": "a golden bus is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220234.jpg", "caption": "a man playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043774.jpg", "caption": "a birthday cake with red and white icing", "annotations": [{"polygon": [[173, 147], [173, 147], [185, 144], [211, 147], [232, 150], [260, 156], [264, 175], [251, 175], [247, 164], [178, 169]], "text": "Protecting", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Pusteting", "recog_valid": false, "glyph_recog_text": "Protecting", "glyph_recog_ld": 0.7000002999996999}, {"polygon": [[158, 227], [204, 230], [213, 236], [218, 236], [226, 235], [226, 230], [224, 209], [185, 210], [176, 199], [155, 201], [152, 209]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Nang", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[284, 192], [281, 218], [293, 221], [316, 216], [338, 217], [346, 221], [356, 215], [368, 228], [377, 231], [382, 229], [379, 206], [354, 199], [336, 198], [328, 198], [323, 203], [311, 201], [306, 199], [304, 195]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Birdteay", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.6250004687494141}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220284.jpg", "caption": "two men playing frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220289.jpg", "caption": "two men standing in front of a display of urinals", "annotations": [{"polygon": [[309, 130], [309, 96], [511, 77]], "text": "SLOAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "se", "recog_valid": false, "glyph_recog_text": "S48AN", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[317, 98], [315, 129], [511, 117], [510, 79]], "text": "SLOAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "SLOAN", "recog_valid": true, "glyph_recog_text": "SLOAN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482433.jpg", "caption": "a bathroom with a toilet and sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482439.jpg", "caption": "a woman holding up a bunch of bananas", "annotations": [{"polygon": [[204, 311], [190, 307], [183, 314], [185, 328], [191, 334], [200, 335], [205, 336], [224, 338], [264, 340], [285, 339], [296, 337], [306, 344], [310, 340], [316, 324], [321, 311], [299, 313], [276, 321], [260, 320], [239, 320], [220, 319], [206, 316]], "text": "Guaranty", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Guaianty", "recog_valid": false, "glyph_recog_text": "Guaranty", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[425, 329], [429, 340], [416, 348], [408, 355], [405, 371], [408, 392], [399, 385], [397, 369], [399, 354], [408, 341], [415, 334]], "text": "SPECIALIZED", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "SPECIALIEEO", "glyph_recog_ld": 9.090900826569381e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351397.jpg", "caption": "a box of stuffed animals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351404.jpg", "caption": "a bunch of bananas in a shopping cart", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482479.jpg", "caption": "a traffic light and a street sign", "annotations": [{"polygon": [[242, 258], [238, 300], [319, 281], [321, 237]], "text": "KING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KING", "recog_valid": true, "glyph_recog_text": "KING", "glyph_recog_ld": 1.0}, {"polygon": [[409, 211], [406, 258], [453, 245], [456, 200]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "ST", "glyph_recog_ld": 1.0}, {"polygon": [[1, 337], [-1, 369], [59, 349], [64, 315], [35, 325], [16, 331]], "text": "RKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "PN", "recog_valid": false, "glyph_recog_text": "RKING", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220366.jpg", "caption": "a man swinging a tennis racket at a ball", "annotations": [{"polygon": [[283, 152], [500, 151], [501, 286], [272, 286]], "text": "BA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BA", "recog_valid": true, "glyph_recog_text": "BA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000089318.jpg", "caption": "a cutting board with carrots, celery, and other vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220390.jpg", "caption": "a bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482534.jpg", "caption": "a red train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482545.jpg", "caption": "a cat drinking from a toilet bowl in a bathroom", "annotations": [{"polygon": [[418, 338], [421, 367], [442, 365], [444, 371], [451, 370], [452, 351], [450, 350]], "text": "Secret", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Te", "recog_valid": false, "glyph_recog_text": "Secre!", "glyph_recog_ld": 0.1666680555532407}, {"polygon": [[416, 362], [412, 387], [427, 393], [432, 390], [457, 397], [457, 388], [453, 387], [450, 374]], "text": "Agenda", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Louae", "recog_valid": false, "glyph_recog_text": "Agenda", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351479.jpg", "caption": "a person using a mouse on a computer keyboard", "annotations": [{"polygon": [[388, 74], [385, 109], [385, 111], [420, 117], [422, 80]], "text": "8068", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "登", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351539.jpg", "caption": "a man standing next to a bunch of surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220487.jpg", "caption": "cowboys and cattle in the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220502.jpg", "caption": "a group of people standing near a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220505.jpg", "caption": "a woman and a child are standing near a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482659.jpg", "caption": "two women hugging on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482671.jpg", "caption": "a plate of food with broccoli, tofu and mushrooms", "annotations": [{"polygon": [[270, 82], [265, 100], [311, 116], [326, 103]], "text": "Dainty", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Dainty", "recog_valid": true, "glyph_recog_text": "Dainty", "glyph_recog_ld": 1.0}, {"polygon": [[338, 105], [326, 117], [382, 152], [393, 139]], "text": "Sichuan", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Sichuan", "recog_valid": true, "glyph_recog_text": "Sichuan", "glyph_recog_ld": 1.0}, {"polygon": [[400, 149], [387, 159], [419, 187], [430, 180], [412, 159]], "text": "Food", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Food", "recog_valid": true, "glyph_recog_text": "Food", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482675.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[228, 279], [239, 306], [261, 303], [269, 306], [286, 309], [294, 312], [305, 315], [318, 318], [330, 323], [344, 298], [308, 289], [295, 286], [279, 282], [263, 278], [250, 276]], "text": "HARRISON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HARRISON", "recog_valid": true, "glyph_recog_text": "HARRISON", "glyph_recog_ld": 1.0}, {"polygon": [[243, 307], [238, 309], [237, 324], [244, 342], [254, 349], [257, 357], [262, 363], [273, 368], [284, 371], [290, 371], [307, 365], [313, 350], [322, 333], [319, 321]], "text": "26", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "26", "recog_valid": true, "glyph_recog_text": "26", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351608.jpg", "caption": "a group of men cutting a cake", "annotations": [{"polygon": [[350, 487], [305, 510], [289, 502], [340, 478]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "PropuC", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.12500109374863277}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351622.jpg", "caption": "a man with glasses and a colorful shirt is holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482696.jpg", "caption": "a kitchen shelf with knives and spices on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220571.jpg", "caption": "a boy reading a book in bed", "annotations": [{"polygon": [[155, 178], [194, 196], [193, 211], [157, 194]], "text": "TIME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TUNE", "recog_valid": false, "glyph_recog_text": "TIME", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[135, 184], [215, 224], [220, 238], [217, 253], [206, 260], [138, 223]], "text": "BOOK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIC", "recog_valid": false, "glyph_recog_text": "BOOK", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[136, 225], [139, 223], [194, 253], [196, 265], [140, 237]], "text": "BOOK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BOOK", "recog_valid": true, "glyph_recog_text": "8oOK", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[138, 238], [208, 272], [222, 279], [214, 303], [140, 262]], "text": "HOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HO", "recog_valid": false, "glyph_recog_text": "HOW", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482722.jpg", "caption": "a yellow and blue train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000089521.jpg", "caption": "a bowl of food on a table with a can of soda", "annotations": [{"polygon": [[342, 140], [338, 146], [342, 165], [347, 179], [355, 192], [376, 208], [378, 204], [363, 191], [354, 179], [347, 165]], "text": "CONU WATER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "CONU WATER", "glyph_recog_ld": 9.99998999939855e-07}, {"polygon": [[406, 198], [399, 203], [372, 188], [358, 168], [350, 144], [350, 117], [360, 106], [359, 116], [358, 127], [359, 147], [369, 169], [383, 184]], "text": "NY OR MA CT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "NY OR MA CT", "glyph_recog_ld": 9.090900826569381e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220605.jpg", "caption": "two men are playing soccer on a field", "annotations": [{"polygon": [[512, 99], [474, 92], [455, 76], [455, 69], [512, 69]], "text": "Wh", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "WA", "recog_valid": false, "glyph_recog_text": "Wh", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220615.jpg", "caption": "a street sweeper is parked on the side of the road", "annotations": [{"polygon": [[392, 226], [392, 226], [402, 220], [418, 218], [438, 230], [447, 236], [453, 226], [444, 217], [425, 207], [415, 205], [401, 205], [384, 213]], "text": "LIMPIO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "WMPIO", "recog_valid": false, "glyph_recog_text": "LIMPIO", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482765.jpg", "caption": "a small plane flying through the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220629.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482775.jpg", "caption": "a plate of food with rice and vegetables on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482798.jpg", "caption": "an old photo of a plane on the ground", "annotations": [{"polygon": [[305, 240], [345, 256], [332, 293], [292, 268]], "text": "35", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "35", "recog_valid": true, "glyph_recog_text": "35", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351726.jpg", "caption": "a group of men playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220654.jpg", "caption": "a dump truck is parked next to a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482802.jpg", "caption": "a small kitchen with a stove and a table", "annotations": [{"polygon": [[353, 80], [485, 82], [494, 102], [491, 114], [361, 115], [348, 111], [347, 85], [347, 85]], "text": "STING", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "STING", "recog_valid": true, "glyph_recog_text": "STING", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482819.jpg", "caption": "a train is pulling into a station with people on the platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482872.jpg", "caption": "a woman sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482877.jpg", "caption": "a vase with flowers in it", "annotations": [{"polygon": [[133, 462], [133, 462], [125, 494], [192, 492], [192, 465]], "text": "Angeis", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Angeis", "recog_valid": true, "glyph_recog_text": "Angeis", "glyph_recog_ld": 1.0}, {"polygon": [[199, 465], [199, 465], [198, 490], [321, 490], [329, 459], [198, 458]], "text": "Photographv", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Photographv", "recog_valid": true, "glyph_recog_text": "Photograph", "glyph_recog_ld": 0.909090991735462}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220736.jpg", "caption": "a bathroom with a large tub and a shower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482885.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[435, 369], [436, 406], [477, 406], [475, 367]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "R", "recog_valid": true, "glyph_recog_text": "R", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220763.jpg", "caption": "a group of people sitting under umbrellas at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482913.jpg", "caption": "a man eating a slice of pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220772.jpg", "caption": "two men playing frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220795.jpg", "caption": "a woman pouring wine into a glass", "annotations": [{"polygon": [[356, 357], [356, 340], [342, 346], [342, 340], [359, 330], [385, 328], [386, 334], [391, 344], [389, 360], [383, 381], [378, 376], [378, 362], [371, 358], [365, 357]], "text": "Ref", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MA", "recog_valid": false, "glyph_recog_text": "Ref", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482940.jpg", "caption": "a row of luggage", "annotations": [{"polygon": [[34, 325], [36, 350], [68, 338], [65, 319]], "text": "13.00", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "130", "recog_valid": false, "glyph_recog_text": "13.00", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000482959.jpg", "caption": "a man dressed in an orange and green suit and tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220821.jpg", "caption": "a baby giraffe is eating", "annotations": [{"polygon": [[506, 424], [504, 382], [445, 382], [445, 424]], "text": "Ag", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "AG", "recog_valid": false, "glyph_recog_text": "A g", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220823.jpg", "caption": "a fruit and vegetable stand with many different fruits and vegetables", "annotations": [{"polygon": [[350, 214], [347, 249], [369, 256], [384, 244], [403, 241], [403, 211]], "text": "399", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "399", "recog_valid": true, "glyph_recog_text": "399", "glyph_recog_ld": 1.0}, {"polygon": [[190, 226], [185, 264], [209, 258], [224, 256], [227, 228]], "text": "99", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "99", "recog_valid": true, "glyph_recog_text": "9", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[100, 204], [102, 235], [108, 237], [120, 231], [145, 227], [148, 219], [147, 209], [140, 199]], "text": "199", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "199", "recog_valid": true, "glyph_recog_text": "199", "glyph_recog_ld": 1.0}, {"polygon": [[3, 208], [1, 239], [36, 234], [34, 210]], "text": "100", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "100", "recog_valid": true, "glyph_recog_text": "100", "glyph_recog_ld": 1.0}, {"polygon": [[85, 77], [80, 108], [86, 101], [95, 101], [118, 103], [118, 77]], "text": "150", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "150", "recog_valid": true, "glyph_recog_text": "150", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220834.jpg", "caption": "a bunch of bananas", "annotations": [{"polygon": [[392, 263], [341, 293], [250, 211], [288, 189], [342, 208]], "text": "Dole", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Dole", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220838.jpg", "caption": "a white wii remote and a cord", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220860.jpg", "caption": "a man holding a bunch of bananas in front of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000351933.jpg", "caption": "a small alarm clock sitting on a shelf next to a plant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000483008.jpg", "caption": "a young girl sitting on a bench outside a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220894.jpg", "caption": "a man on a scooter is stopped at a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000089848.jpg", "caption": "a cat drinking from a cup", "annotations": [{"polygon": [[99, 305], [132, 322], [130, 335], [101, 324]], "text": "gs", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NOD", "recog_valid": false, "glyph_recog_text": "gs", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220922.jpg", "caption": "a cat sitting on a desk next to a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000483070.jpg", "caption": "a bus is parked in front of a store window", "annotations": [{"polygon": [[221, 130], [221, 221], [340, 219], [341, 135]], "text": "GO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GO", "recog_valid": true, "glyph_recog_text": "GO", "glyph_recog_ld": 1.0}, {"polygon": [[207, 223], [207, 265], [352, 259], [351, 221]], "text": "SPURS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPURS", "recog_valid": true, "glyph_recog_text": "SPURS", "glyph_recog_ld": 1.0}, {"polygon": [[221, 267], [221, 355], [340, 343], [340, 262]], "text": "GO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GO", "recog_valid": true, "glyph_recog_text": "GO", "glyph_recog_ld": 1.0}, {"polygon": [[203, 66], [152, 62], [152, 96], [202, 98]], "text": "05", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "0D", "recog_valid": false, "glyph_recog_text": "05", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000483078.jpg", "caption": "a baseball player standing on a mound", "annotations": [{"polygon": [[380, 163], [379, 212], [412, 215], [408, 162]], "text": "Sox", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "UPS", "recog_valid": false, "glyph_recog_text": "0c", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000352019.jpg", "caption": "a double decker bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000483107.jpg", "caption": "a traffic light with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000089914.jpg", "caption": "a man taking a picture of himself in a bathroom mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220988.jpg", "caption": "a group of children sitting at a table with paper and scissors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000220989.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000483138.jpg", "caption": "a skateboard with a blue and yellow eye on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000352088.jpg", "caption": "a man in a suit and tie standing at a podium", "annotations": [{"polygon": [[330, 154], [322, 177], [412, 163], [408, 147]], "text": "American", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Amcrican", "recog_valid": false, "glyph_recog_text": "American", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000483213.jpg", "caption": "a man holding up a cell phone in front of a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000352162.jpg", "caption": "a clock with a horse and a horse statue", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000352182.jpg", "caption": "a man sitting on a toilet with a gnome on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000352185.jpg", "caption": "a man riding a bicycle on a street next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000221120.jpg", "caption": "a woman sitting on a curb eating a sandwich", "annotations": [{"polygon": [[185, 474], [188, 488], [228, 467], [222, 458], [208, 462], [190, 469], [186, 472]], "text": "Pain", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Pain", "recog_valid": true, "glyph_recog_text": "Pain", "glyph_recog_ld": 1.0}, {"polygon": [[249, 442], [255, 452], [286, 433], [281, 422]], "text": "appetit", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "appett", "recog_valid": false, "glyph_recog_text": "i", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000221119.jpg", "caption": "two women standing on a bridge with luggage", "annotations": [{"polygon": [[98, 12], [100, 82], [163, 84], [160, 21]], "text": "CLark ", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Clark", "recog_valid": false, "glyph_recog_text": "!", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[170, 26], [172, 84], [220, 89], [218, 33]], "text": "Lake", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "号", "recog_valid": false, "glyph_recog_text": "J5.", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000352205.jpg", "caption": "a dog holding a frisbee in its mouth", "annotations": [{"polygon": [[230, 285], [184, 312], [170, 298], [191, 278], [221, 270]], "text": "ippy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "sadtre", "recog_valid": false, "glyph_recog_text": "ippy", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[187, 320], [193, 330], [199, 363], [185, 372], [158, 348], [168, 312]], "text": "flopper", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "lopper", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000483334.jpg", "caption": "a street sign with a sign that says north end", "annotations": [{"polygon": [[131, 184], [208, 181], [207, 208], [128, 211]], "text": "NORTH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NORTH", "recog_valid": true, "glyph_recog_text": "NORTH", "glyph_recog_ld": 1.0}, {"polygon": [[221, 179], [310, 177], [311, 205], [221, 208]], "text": "STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STREET", "recog_valid": true, "glyph_recog_text": "STREET", "glyph_recog_ld": 1.0}, {"polygon": [[143, 243], [223, 241], [223, 271], [141, 273]], "text": "NORTH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NORTH", "recog_valid": true, "glyph_recog_text": "NORTH", "glyph_recog_ld": 1.0}, {"polygon": [[238, 240], [287, 238], [288, 269], [238, 271]], "text": "END", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "END", "recog_valid": true, "glyph_recog_text": "END", "glyph_recog_ld": 1.0}, {"polygon": [[146, 379], [374, 375], [378, 449], [146, 448]], "text": "TOBIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TOBIN", "recog_valid": true, "glyph_recog_text": "TOBIN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000221223.jpg", "caption": "a television set with a man on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000483377.jpg", "caption": "a white truck with a japanese flag on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000221241.jpg", "caption": "a red double decker bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000221243.jpg", "caption": "a tow truck with a flatbed on the back of it", "annotations": [{"polygon": [[349, 141], [343, 168], [494, 162], [505, 140], [494, 131], [437, 134], [436, 128], [423, 131], [421, 136]], "text": "metro", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "metro", "recog_valid": true, "glyph_recog_text": "metro", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000221252.jpg", "caption": "a black and white photo of two people on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000352360.jpg", "caption": "a young boy sitting at a table with a piece of cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000483449.jpg", "caption": "a woman on skis standing in the snow", "annotations": [{"polygon": [[480, 433], [479, 329], [510, 329], [508, 435]], "text": "BANSKO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CN/2", "recog_valid": false, "glyph_recog_text": "m", "recog_valid": false, "glyph_recog_text": "LOL", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000095592.jpg", "caption": "a small green car parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000095622.jpg", "caption": "a small airplane parked on the grass with people standing around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000488844.jpg", "caption": "a bus is parked next to a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000226702.jpg", "caption": "a blue and yellow train engine on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000357774.jpg", "caption": "a clock on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000226708.jpg", "caption": "a bus parked in a parking lot next to a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000488857.jpg", "caption": "a house with a tree in front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000357790.jpg", "caption": "a yellow fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000095659.jpg", "caption": "a man playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000488883.jpg", "caption": "a stop sign with a bunch of stickers on it", "annotations": [{"polygon": [[200, 48], [203, 94], [296, 106], [294, 72], [265, 57]], "text": "Main", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Main", "recog_valid": true, "glyph_recog_text": "Main", "glyph_recog_ld": 1.0}, {"polygon": [[215, 166], [218, 208], [223, 208], [314, 167], [315, 133], [235, 168], [226, 164]], "text": "Cameron", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cameron", "recog_valid": true, "glyph_recog_text": "Cameron", "glyph_recog_ld": 1.0}, {"polygon": [[320, 288], [315, 299], [361, 334], [366, 325]], "text": "BRANDED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BRANDED", "recog_valid": true, "glyph_recog_text": "BRANDED", "glyph_recog_ld": 1.0}, {"polygon": [[149, 334], [157, 343], [192, 295], [184, 288]], "text": "BRANDED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BRANDED", "recog_valid": true, "glyph_recog_text": "BRAADED", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000357810.jpg", "caption": "a hamburger and french fries on a tray", "annotations": [{"polygon": [[150, 31], [163, 27], [178, 22], [188, 21], [200, 22], [197, 0], [185, -2], [178, 1], [166, 3], [156, 4], [145, 5], [139, 5], [147, 29]], "text": "YELLOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "YELLOW", "recog_valid": true, "glyph_recog_text": "YELLOW", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000226747.jpg", "caption": "a woman in a white shirt and black shorts is hitting a tennis ball", "annotations": [{"polygon": [[305, 197], [340, 197], [356, 225], [311, 229]], "text": "JJUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "JJUS", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[329, 229], [323, 266], [359, 264], [359, 227]], "text": "DO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "P", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[41, 260], [34, 391], [278, 404], [268, 272]], "text": "Morg", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Morg", "recog_valid": true, "glyph_recog_text": "Morg", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000095676.jpg", "caption": "a baseball player sliding into base while another is trying to catch the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000357837.jpg", "caption": "a woman sitting on the floor next to a refrigerator", "annotations": [{"polygon": [[342, 307], [368, 270], [379, 286], [349, 327]], "text": "ROBBIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ROBRW", "recog_valid": false, "glyph_recog_text": "ROBBIN", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000095711.jpg", "caption": "a baseball player sliding into home plate", "annotations": [{"polygon": [[273, 145], [265, 133], [278, 121], [300, 115], [310, 115], [321, 117], [336, 122], [343, 129], [348, 138], [333, 147], [324, 136], [316, 132], [308, 131], [300, 131], [290, 133], [282, 135]], "text": "FRANCOEUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BRANCUEIO", "recog_valid": false, "glyph_recog_text": "FRANCOEUR", "glyph_recog_ld": 0.5555560493821674}, {"polygon": [[280, 172], [280, 172], [314, 174], [314, 170], [319, 137], [286, 136]], "text": "21", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "21", "recog_valid": true, "glyph_recog_text": "2", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000357859.jpg", "caption": "a bear standing in front of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000488941.jpg", "caption": "a bathroom with a mural on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000357877.jpg", "caption": "a man and a woman talking on cell phones", "annotations": [{"polygon": [[338, 340], [346, 364], [428, 335], [418, 312], [338, 340]], "text": "MICHIG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MICHIG", "recog_valid": true, "glyph_recog_text": "MICHIG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000226812.jpg", "caption": "a baseball player standing on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000226814.jpg", "caption": "a yellow school bus parked in the dirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000488964.jpg", "caption": "a person is using a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000226825.jpg", "caption": "a black and white photo of people on bikes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000357898.jpg", "caption": "a woman talking on a cell phone", "annotations": [{"polygon": [[270, 342], [326, 333], [335, 384], [282, 394], [275, 389], [266, 350]], "text": "GI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GI", "recog_valid": true, "glyph_recog_text": "GI", "glyph_recog_ld": 1.0}, {"polygon": [[178, 353], [181, 349], [193, 350], [204, 377], [205, 391], [199, 397], [175, 399], [171, 393]], "text": "G", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "四", "recog_valid": false, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000488990.jpg", "caption": "a city street with people crossing the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000357971.jpg", "caption": "a baseball player throwing a ball", "annotations": [{"polygon": [[291, 230], [297, 259], [168, 297], [162, 231], [289, 220]], "text": "Ozioles 46", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cierbs", "recog_valid": false, "glyph_recog_text": "Ozioles 46", "glyph_recog_ld": 0.20000079999919995}, {"polygon": [[285, 274], [299, 277], [296, 314], [241, 310], [241, 270]], "text": "46", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "46", "recog_valid": true, "glyph_recog_text": "46", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358020.jpg", "caption": "a large stone building with a clock on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358033.jpg", "caption": "a black and white photo of buses parked in a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489107.jpg", "caption": "a man riding a bicycle with a police officer behind him", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000226966.jpg", "caption": "a sandwich with onions and cheese on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358043.jpg", "caption": "a man riding a skateboard down a ramp at night", "annotations": [{"polygon": [[122, 148], [145, 163], [147, 166], [155, 168], [164, 175], [170, 167], [164, 162], [161, 160], [157, 151], [155, 148], [151, 154], [128, 139]], "text": "DESAEI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DESAVEI", "recog_valid": false, "glyph_recog_text": "DESAEI", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489117.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[409, 224], [411, 244], [462, 218], [460, 199]], "text": "CLINTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CLINTON", "recog_valid": true, "glyph_recog_text": "CLINTON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489123.jpg", "caption": "a small airplane sitting on a sandy beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000226983.jpg", "caption": "a baseball player is at bat and the catcher is behind him", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000095916.jpg", "caption": "a surfer riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227000.jpg", "caption": "a woman standing next to a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489155.jpg", "caption": "a man in a hat talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358085.jpg", "caption": "a cat watching a soccer game on a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358118.jpg", "caption": "a jet fighter flying in the sky with smoke coming out of its tail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000095989.jpg", "caption": "a dog is chasing sheep in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227069.jpg", "caption": "a stop sign is shown in the middle of a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227109.jpg", "caption": "a baseball player catching a ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358198.jpg", "caption": "a stuffed toy with a pink hair and a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358206.jpg", "caption": "a man on a skateboard", "annotations": [{"polygon": [[148, 31], [183, 26], [179, 0], [143, -1], [141, 21]], "text": "ES", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "ES", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096067.jpg", "caption": "a little girl looking at a jar of eggs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358223.jpg", "caption": "a bundt cake on a plate with a knife and fork", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489297.jpg", "caption": "a british airways plane taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358239.jpg", "caption": "a sandwich on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096106.jpg", "caption": "a delta airplane is parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358265.jpg", "caption": "an elephant is standing on the road next to a jeep", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489343.jpg", "caption": "a street sign with two street signs on it", "annotations": [{"polygon": [[170, 213], [171, 235], [183, 239], [244, 243], [244, 218], [175, 210]], "text": "QUEEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "QUEEN", "recog_valid": true, "glyph_recog_text": "QUEEN", "glyph_recog_ld": 1.0}, {"polygon": [[256, 221], [255, 245], [371, 258], [369, 233]], "text": "ELIZABETH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ELIZABETH", "recog_valid": true, "glyph_recog_text": "ELIZABETH", "glyph_recog_ld": 1.0}, {"polygon": [[399, 329], [400, 356], [341, 394], [340, 370]], "text": "ELIZABETH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ELIZABETH", "recog_valid": true, "glyph_recog_text": "ELIZABETH", "glyph_recog_ld": 1.0}, {"polygon": [[302, 394], [303, 420], [335, 399], [335, 375]], "text": "QUEEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "OUEEN", "recog_valid": false, "glyph_recog_text": "QUEEN", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096130.jpg", "caption": "a desk with two laptops and a mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358289.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227231.jpg", "caption": "a white and red bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227236.jpg", "caption": "a bed with a white sheet and a light on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227250.jpg", "caption": "a person in a pink shirt riding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227337.jpg", "caption": "a man wearing a london new york berlin tokyo sweatshirt", "annotations": [{"polygon": [[188, 227], [253, 262], [341, 296], [334, 323], [249, 294], [178, 259]], "text": "LONDON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LONDON", "recog_valid": true, "glyph_recog_text": "LONDON", "glyph_recog_ld": 1.0}, {"polygon": [[171, 267], [171, 267], [227, 292], [217, 319], [164, 299], [168, 272]], "text": "NEW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NEW", "recog_valid": true, "glyph_recog_text": "NEW", "glyph_recog_ld": 1.0}, {"polygon": [[235, 300], [368, 339], [358, 365], [352, 366], [232, 332]], "text": "YORK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YORK", "recog_valid": true, "glyph_recog_text": "YORK", "glyph_recog_ld": 1.0}, {"polygon": [[182, 318], [243, 345], [311, 366], [304, 394], [275, 390], [177, 351]], "text": "BERLIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BERLIN", "recog_valid": true, "glyph_recog_text": "BERLIN", "glyph_recog_ld": 1.0}, {"polygon": [[175, 359], [175, 359], [232, 385], [270, 400], [301, 406], [296, 436], [241, 427], [205, 412], [179, 393]], "text": "TOKYO", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "TOKYO", "recog_valid": true, "glyph_recog_text": "TOKYO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227363.jpg", "caption": "a large military plane sitting on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489510.jpg", "caption": "a refrigerator with many stickers on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489524.jpg", "caption": "three boats are parked in front of a hut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096311.jpg", "caption": "a person holding a blackberry phone with a barcode on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227402.jpg", "caption": "a black and white photo of a football player sitting on a bench", "annotations": [{"polygon": [[116, 281], [116, 281], [122, 304], [126, 307], [136, 304], [141, 300], [150, 299], [153, 295], [149, 284], [144, 272], [137, 275], [130, 277]], "text": "75", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "25", "recog_valid": false, "glyph_recog_text": "75", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096338.jpg", "caption": "people walking in the rain with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489554.jpg", "caption": "a bus driving down a road with a few trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227456.jpg", "caption": "a black and white photo of people standing under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227484.jpg", "caption": "a red convertible car is parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096414.jpg", "caption": "a baseball game is being played on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227490.jpg", "caption": "an orange and black train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489666.jpg", "caption": "a group of people walking on a snowy slope", "annotations": [{"polygon": [[154, 326], [219, 329], [223, 356], [155, 354]], "text": "BIGAPP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIGAPP", "recog_valid": true, "glyph_recog_text": "BIGAPP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358599.jpg", "caption": "a large air force plane parked in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358607.jpg", "caption": "a clock on a tree in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358608.jpg", "caption": "a small plane flying over a field with other planes", "annotations": [{"polygon": [[151, 221], [153, 220], [156, 237], [181, 227], [178, 206]], "text": "PULPIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PULPIT", "recog_valid": true, "glyph_recog_text": "T", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358616.jpg", "caption": "a white and red bus parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227568.jpg", "caption": "a close up of a cell phone with a text message", "annotations": [{"polygon": [[123, 262], [142, 244], [132, 231], [112, 248], [114, 254]], "text": "tm2", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "tm2", "recog_valid": true, "glyph_recog_text": "tn2", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[136, 279], [168, 249], [157, 238], [125, 269]], "text": "Metro", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Hetro", "recog_valid": false, "glyph_recog_text": "Metra", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[171, 317], [162, 308], [190, 283], [198, 295]], "text": "Enter", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Enter", "recog_valid": true, "glyph_recog_text": "Enter", "glyph_recog_ld": 1.0}, {"polygon": [[295, 327], [303, 335], [328, 308], [320, 301]], "text": "grand", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "grand", "recog_valid": true, "glyph_recog_text": "园中品", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[217, 358], [223, 368], [250, 343], [240, 337]], "text": "Start", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Start", "recog_valid": true, "glyph_recog_text": "H1grt", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[186, 331], [193, 339], [220, 318], [237, 299], [230, 293], [218, 302]], "text": "Examples:", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Examples", "recog_valid": false, "glyph_recog_text": "Examplers:", "glyph_recog_ld": 0.8000001999998}, {"polygon": [[178, 322], [182, 329], [219, 295], [219, 293], [213, 288], [190, 305], [183, 310]], "text": "address", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "address", "recog_valid": true, "glyph_recog_text": "acldress", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[230, 270], [237, 277], [279, 239], [279, 238], [275, 232], [269, 232]], "text": "landmark.", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "landmark", "recog_valid": false, "glyph_recog_text": "larudraar.", "glyph_recog_ld": 0.5000004999995}, {"polygon": [[209, 267], [216, 276], [274, 222], [267, 216], [254, 225]], "text": "landmark.", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "intersection", "recog_valid": false, "glyph_recog_text": "tandglac", "glyph_recog_ld": 0.08333409722158558}, {"polygon": [[222, 184], [228, 193], [263, 160], [261, 152], [229, 174]], "text": "Planner", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Planner", "recog_valid": true, "glyph_recog_text": "PHknner", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[165, 235], [174, 245], [202, 218], [198, 212], [188, 214]], "text": "Simple", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Simple", "recog_valid": true, "glyph_recog_text": "Bimole", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[146, 222], [154, 228], [169, 216], [204, 181], [202, 174], [186, 183]], "text": "TripMaster", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TripHastor", "recog_valid": false, "glyph_recog_text": "TyioM号53号", "glyph_recog_ld": 0.20000079999919995}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358646.jpg", "caption": "two people walking in front of a large plane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489721.jpg", "caption": "a traffic sign that is hanging over a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227587.jpg", "caption": "a man holding a banana", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489734.jpg", "caption": "a man holding a wii remote in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489735.jpg", "caption": "a plate with two pieces of toast and a bowl of soup", "annotations": [{"polygon": [[276, 64], [281, 121], [301, 132], [313, 125], [317, 86], [302, 63]], "text": "V8", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Co", "recog_valid": false, "glyph_recog_text": ">ω", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358667.jpg", "caption": "a couch with luggage on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227598.jpg", "caption": "a statue of a bird with a nest on top of a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227612.jpg", "caption": "a group of people on bikes in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358684.jpg", "caption": "a qantas airplane flying in the blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489810.jpg", "caption": "a sandwich with meat and vegetables", "annotations": [{"polygon": [[291, 136], [306, 184], [325, 172], [338, 158], [351, 146], [352, 121], [321, 129]], "text": "Ves", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1", "recog_valid": false, "glyph_recog_text": "Ves", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227683.jpg", "caption": "two people skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358756.jpg", "caption": "a black and white photo of a plane flying over a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358788.jpg", "caption": "a plate with a bunch of donuts on it", "annotations": [{"polygon": [[120, 25], [120, 51], [265, 48], [265, 20]], "text": "PETITPLAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Petitplat", "recog_valid": false, "glyph_recog_text": "PETITPLAT", "glyph_recog_ld": 0.1111120987643347}, {"polygon": [[276, 22], [276, 22], [277, 49], [277, 66], [317, 64], [317, 31]], "text": "by", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "oy", "recog_valid": false, "glyph_recog_text": "b", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227723.jpg", "caption": "a little girl in a birthday hat eating cake", "annotations": [{"polygon": [[316, 90], [330, 81], [366, 77], [367, 103], [351, 105], [333, 113], [325, 121]], "text": "TUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "10R", "recog_valid": false, "glyph_recog_text": "TUR", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[396, 69], [404, 93], [407, 92], [425, 89], [434, 87], [459, 98], [463, 102], [466, 77], [456, 66], [430, 62]], "text": "ON", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "ONTD", "recog_valid": false, "glyph_recog_text": "ON", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227724.jpg", "caption": "a street sign is on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227743.jpg", "caption": "several people sitting at a table with laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489890.jpg", "caption": "a large white and orange moving truck parked in a field", "annotations": [{"polygon": [[186, 195], [187, 210], [188, 212], [278, 190], [268, 173]], "text": "U'HAUL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UHAUL", "recog_valid": false, "glyph_recog_text": "U'HAUL", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[334, 183], [336, 204], [394, 220], [390, 202]], "text": "UHAUL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UHAUL", "recog_valid": true, "glyph_recog_text": "UHAUL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489914.jpg", "caption": "wall street, new york city, usa", "annotations": [{"polygon": [[284, 291], [283, 323], [443, 281], [451, 244]], "text": "BROADWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BROADWAY", "recog_valid": true, "glyph_recog_text": "BROADWAY", "glyph_recog_ld": 1.0}, {"polygon": [[406, 120], [408, 162], [481, 240], [482, 231], [472, 189]], "text": "WALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "WAILL", "recog_valid": false, "glyph_recog_text": "WALL", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489961.jpg", "caption": "a one way sign on a pole in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096745.jpg", "caption": "a man riding a horse in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096748.jpg", "caption": "a man talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000489967.jpg", "caption": "a traffic light is on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227825.jpg", "caption": "a man walking down the street with a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358904.jpg", "caption": "a view of a ski slope from a window", "annotations": [{"polygon": [[345, 167], [368, 162], [360, 345], [335, 345]], "text": "HEAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CV3", "recog_valid": false, "glyph_recog_text": "工山<口", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000358921.jpg", "caption": "a woman sitting at a table with a plate of cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096790.jpg", "caption": "a motorcycle with two bags on the back and a red and white sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227869.jpg", "caption": "a man and woman cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096826.jpg", "caption": "a green bus is parked next to a pink bus", "annotations": [{"polygon": [[333, 109], [336, 144], [489, 119], [487, 78]], "text": "Sainsbury's", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Sainsbury's", "recog_valid": true, "glyph_recog_text": "Sainsbury's", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227903.jpg", "caption": "a baseball player standing on a field with a glove", "annotations": [{"polygon": [[63, 245], [96, 246], [99, 280], [63, 281]], "text": "Right", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Right", "recog_valid": true, "glyph_recog_text": "", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[98, 246], [139, 247], [137, 277], [99, 277]], "text": "Store.", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Store", "recog_valid": false, "glyph_recog_text": "Stare.", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[142, 245], [177, 246], [180, 280], [143, 281]], "text": "Right", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Right", "recog_valid": true, "glyph_recog_text": "F", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[177, 246], [218, 245], [215, 277], [180, 277]], "text": "Price.", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Price", "recog_valid": false, "glyph_recog_text": "Price.", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[126, 131], [126, 131], [135, 131], [140, 163], [176, 163], [175, 196], [157, 198], [152, 226], [133, 227], [131, 201], [108, 200], [104, 152], [114, 143]], "text": "Kroger", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "Kroge", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[349, 185], [350, 203], [319, 220], [313, 214], [308, 200]], "text": "Norfolk", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Boas", "recog_valid": false, "glyph_recog_text": "Norfoik", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[510, 102], [511, 275], [403, 293], [328, 108]], "text": "Bojar", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "W", "recog_valid": false, "glyph_recog_text": ":", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096854.jpg", "caption": "a man is putting hot dogs on a grill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227948.jpg", "caption": "a skateboarder riding a ramp at a skate park", "annotations": [{"polygon": [[254, 261], [230, 202], [352, 173], [367, 222]], "text": "ASK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ASK", "recog_valid": true, "glyph_recog_text": "ASK", "glyph_recog_ld": 1.0}, {"polygon": [[146, 395], [260, 512], [347, 512], [365, 488], [196, 371]], "text": "WORS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ORO", "recog_valid": false, "glyph_recog_text": "WORS", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[105, 383], [117, 406], [158, 386], [148, 369]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "一", "recog_valid": false, "glyph_recog_text": "12", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[108, 356], [105, 319], [146, 319], [150, 344]], "text": "EAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "多", "recog_valid": false, "glyph_recog_text": "EAD", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359032.jpg", "caption": "a group of men sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490117.jpg", "caption": "a cake with a picture of a cat on it", "annotations": [{"polygon": [[135, 208], [182, 237], [171, 243], [161, 253], [117, 232], [125, 222], [121, 218], [124, 217], [128, 219]], "text": "HEPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HEPPY", "recog_valid": true, "glyph_recog_text": "HEPPY", "glyph_recog_ld": 1.0}, {"polygon": [[188, 238], [195, 239], [202, 247], [204, 250], [211, 249], [220, 251], [236, 264], [227, 272], [223, 267], [215, 278], [176, 261], [179, 256], [181, 256], [192, 244], [192, 242], [187, 242]], "text": "BERF", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BERF", "recog_valid": true, "glyph_recog_text": "BERF", "glyph_recog_ld": 1.0}, {"polygon": [[227, 280], [241, 263], [247, 267], [262, 272], [274, 278], [275, 279], [266, 288], [261, 300], [227, 285], [226, 283]], "text": "DAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DAY", "recog_valid": true, "glyph_recog_text": "DAY", "glyph_recog_ld": 1.0}, {"polygon": [[132, 269], [142, 260], [147, 250], [164, 259], [185, 274], [212, 283], [215, 286], [209, 313], [181, 297], [141, 278], [133, 272]], "text": "MARISA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MARISA", "recog_valid": true, "glyph_recog_text": "MARISA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227975.jpg", "caption": "a group of young boys sitting on a bench", "annotations": [{"polygon": [[400, 245], [400, 245], [394, 255], [394, 255], [398, 283], [398, 283], [408, 291], [408, 291], [423, 290], [423, 290], [428, 280], [428, 280], [422, 251], [422, 251], [417, 243], [417, 243]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "O", "recog_valid": false, "glyph_recog_text": "0)", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[214, 261], [209, 292], [216, 301], [231, 303], [240, 297], [245, 264], [238, 257], [223, 255]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "00", "recog_valid": false, "glyph_recog_text": "co", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[95, 248], [98, 298], [109, 298], [127, 261], [125, 250]], "text": "7", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "上", "recog_valid": false, "glyph_recog_text": "卜", "glyph_recog_ld": 1.0}, {"polygon": [[4, 256], [1, 294], [9, 304], [26, 304], [34, 299], [36, 283], [38, 262], [31, 254], [14, 253]], "text": "6", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CO", "recog_valid": false, "glyph_recog_text": "o", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000227982.jpg", "caption": "a red and white train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490126.jpg", "caption": "a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359070.jpg", "caption": "a bag of items laid out on a newspaper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359099.jpg", "caption": "a man riding skis down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359106.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[164, 228], [150, 316], [396, 317], [395, 227]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[215, 319], [374, 320], [354, 392], [278, 401], [211, 395]], "text": "ME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ME", "recog_valid": true, "glyph_recog_text": "ME", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228035.jpg", "caption": "a bathroom with a toilet and a blue handrail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490182.jpg", "caption": "a baseball player sliding into home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096984.jpg", "caption": "a white teddy bear with a green bow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000096986.jpg", "caption": "a black and blue exercise bag sitting on the floor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359132.jpg", "caption": "a large clock on the back of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359139.jpg", "caption": "a woman pouring wine into a glass at an outdoor event", "annotations": [{"polygon": [[320, 419], [364, 411], [368, 433], [325, 441]], "text": "VIEJA'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "VIEIAS", "recog_valid": false, "glyph_recog_text": "VIEJAS", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359147.jpg", "caption": "a man riding a skateboard on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000097023.jpg", "caption": "people standing on a street corner in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228098.jpg", "caption": "a man standing next to a model airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490268.jpg", "caption": "a baseball player sliding into home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359202.jpg", "caption": "a skateboarder is doing a trick on a ramp", "annotations": [{"polygon": [[175, 261], [173, 319], [212, 316], [213, 265]], "text": "ride", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "22", "recog_valid": false, "glyph_recog_text": "-.", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[0, 241], [0, 271], [47, 267], [59, 234]], "text": "SA", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SA", "recog_valid": true, "glyph_recog_text": "SA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490291.jpg", "caption": "a woman in red pants and a green jacket holding a large wooden toy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490347.jpg", "caption": "a skateboarder doing a trick on a ramp at a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490385.jpg", "caption": "a display of donuts in a bakery with a sign that says new york fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228260.jpg", "caption": "a desk with a clock, keyboard, and other items", "annotations": [{"polygon": [[505, 245], [442, 280], [450, 290], [514, 252]], "text": "THRIFTWA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "VM1318H1", "recog_valid": false, "glyph_recog_text": "THRIFTWA", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000097194.jpg", "caption": "a yellow fire hydrant on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359354.jpg", "caption": "a man is putting pizza in a pizza oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490434.jpg", "caption": "a large jetliner on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228293.jpg", "caption": "a person holding a remote control in their hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490443.jpg", "caption": "a red umbrella and chair on a sandy beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228326.jpg", "caption": "girls soccer team in action on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359403.jpg", "caption": "a baseball player swinging a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490496.jpg", "caption": "a santa clause in the back of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490507.jpg", "caption": "three cats are looking at a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000097296.jpg", "caption": "a baseball player is throwing a ball to the batter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490518.jpg", "caption": "an old postcard of a clock tower and beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490541.jpg", "caption": "a young boy holding a tennis racket in a gymnasium", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228398.jpg", "caption": "a group of people standing next to a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359470.jpg", "caption": "a man sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228409.jpg", "caption": "a motorcycle rider is riding on a road", "annotations": [{"polygon": [[225, 402], [274, 391], [276, 375], [265, 368], [252, 369], [213, 379], [209, 396]], "text": "aprilia", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "aprilia", "recog_valid": true, "glyph_recog_text": "aprilia", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000097363.jpg", "caption": "a stop sign sitting on a grassy area next to a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490579.jpg", "caption": "a stop sign with a sign that says alto 4 alto", "annotations": [{"polygon": [[81, 269], [191, 272], [192, 326], [78, 324]], "text": "ALTO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALTO", "recog_valid": true, "glyph_recog_text": "ALTO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359521.jpg", "caption": "peanut butter banana oatmeal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490598.jpg", "caption": "a clock on a fence", "annotations": [{"polygon": [[186, 189], [203, 204], [209, 198], [219, 188], [233, 180], [251, 178], [268, 180], [284, 187], [300, 200], [303, 203], [320, 186], [314, 178], [293, 163], [273, 156], [254, 152], [232, 155], [217, 159], [206, 165], [194, 174], [186, 188]], "text": "ORIOLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "cibiee", "recog_valid": false, "glyph_recog_text": "ORIOLES", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[366, 312], [364, 363], [485, 370], [487, 367], [491, 318]], "text": "SUN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SUN", "recog_valid": true, "glyph_recog_text": "SUN", "glyph_recog_ld": 1.0}, {"polygon": [[18, 299], [17, 351], [152, 356], [154, 341], [150, 304]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "THF", "recog_valid": false, "glyph_recog_text": "THE", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228461.jpg", "caption": "a red fire hydrant sitting on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359565.jpg", "caption": "a vase with a flower in it", "annotations": [{"polygon": [[114, 73], [140, 76], [136, 65], [142, 56], [141, 46], [134, 41], [115, 37], [112, 72]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "在", "recog_valid": false, "glyph_recog_text": "c", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359567.jpg", "caption": "a magazine cover with a cake made out of apples and cream", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359568.jpg", "caption": "a man and a woman are hanging from a traffic light pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490665.jpg", "caption": "a display of bananas in a store", "annotations": [{"polygon": [[132, 13], [208, 32], [207, 69], [132, 56]], "text": "Chiquita", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Chiquita", "recog_valid": true, "glyph_recog_text": "Chiquite", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228522.jpg", "caption": "a group of red double decker buses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228527.jpg", "caption": "a rusty metal pole with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228539.jpg", "caption": "a person riding a motorcycle down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359688.jpg", "caption": "a person sitting at a desk with a plate of pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228618.jpg", "caption": "1951 chevrolet 3100 cc-131409 for sale in california", "annotations": [{"polygon": [[208, 246], [403, 207], [402, 228], [209, 272]], "text": "CHEVROLET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CHEVROLET", "recog_valid": true, "glyph_recog_text": "CHEVROLET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359695.jpg", "caption": "a fire hydrant is being used to fill a fire truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228624.jpg", "caption": "a man sitting on a ledge with a bunch of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359701.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359704.jpg", "caption": "a bench sitting in a park surrounded by flowers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359707.jpg", "caption": "a group of people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000490801.jpg", "caption": "a woman and two children are eating cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000359746.jpg", "caption": "a baseball game", "annotations": [{"polygon": [[320, 208], [318, 238], [360, 240], [360, 210]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "25", "recog_valid": true, "glyph_recog_text": "25", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000228678.jpg", "caption": "a street sign and a sign for a bank", "annotations": [{"polygon": [[336, 151], [345, 324], [368, 323], [353, 151]], "text": "THE ELIZABETH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A--N", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[217, 243], [288, 213], [284, 264], [213, 286]], "text": "TRAFFIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AFFC", "recog_valid": false, "glyph_recog_text": "TRAFFIC", "glyph_recog_ld": 0.5714291836725947}, {"polygon": [[209, 453], [244, 455], [241, 488], [207, 483]], "text": "PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Aeo3", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365772.jpg", "caption": "a man and a woman are playing baseball in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234720.jpg", "caption": "a group of people dressed as bears riding on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496874.jpg", "caption": "1961 chevrolet c10 cc-131209 for sale in oregon, washington", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000103676.jpg", "caption": "a microwave with a toy stormtrooper on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496902.jpg", "caption": "a boy and girl at a birthday party", "annotations": [{"polygon": [[106, 283], [80, 299], [74, 284], [97, 269]], "text": "new", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "rhew", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365855.jpg", "caption": "a bathroom with a shower and toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365858.jpg", "caption": "a pizza with a half eaten slice and a coke", "annotations": [{"polygon": [[360, 132], [391, 160], [380, 274], [352, 256]], "text": "CocalCola", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "9ogna", "recog_valid": false, "glyph_recog_text": "oo", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496933.jpg", "caption": "a large airplane sitting on the tarmac at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000103722.jpg", "caption": "a woman in pink surfing outfit riding a wave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496939.jpg", "caption": "two motorcycles parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365879.jpg", "caption": "a desk with a laptop, a notebook, a pen, a notepad, a phone, a pen, a notepad, a phone, a pen,", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234839.jpg", "caption": "a sign has texts on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365927.jpg", "caption": "a large kitchen with a sink and a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365946.jpg", "caption": "two men playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234883.jpg", "caption": "a street sign with a green light on it", "annotations": [{"polygon": [[304, 298], [302, 320], [311, 321], [326, 319], [345, 318], [360, 314], [377, 314], [393, 312], [407, 310], [403, 286], [383, 289], [366, 292], [356, 293], [344, 295], [332, 297], [325, 296]], "text": "GOLDEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GOLDEN", "recog_valid": true, "glyph_recog_text": "GOLDEN", "glyph_recog_ld": 1.0}, {"polygon": [[425, 282], [423, 297], [425, 306], [435, 307], [447, 305], [462, 303], [474, 302], [492, 299], [495, 298], [495, 295], [491, 287], [491, 283], [491, 275], [482, 276], [470, 277], [457, 279], [450, 280]], "text": "GATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "GATE", "recog_valid": true, "glyph_recog_text": "GATE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000103812.jpg", "caption": "blackberry bold 9900", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365958.jpg", "caption": "a man on skis is walking down a snowy path", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234900.jpg", "caption": "a man standing next to a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234916.jpg", "caption": "a man cutting a cake with a young boy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366009.jpg", "caption": "a group of people sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366021.jpg", "caption": "a car parked in a parking lot at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366048.jpg", "caption": "a man is standing next to a bus with a man in a vest", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497123.jpg", "caption": "a sign on a street that says no bottles or glass on street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366058.jpg", "caption": "a train is seen in the rear view mirror of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366061.jpg", "caption": "a teddy bear sitting on a small table", "annotations": [{"polygon": [[300, 288], [287, 303], [293, 310], [297, 313], [326, 331], [335, 318]], "text": "Harrods", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Hawwed", "recog_valid": false, "glyph_recog_text": "Harrods", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234994.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000103935.jpg", "caption": "a skier in the air on a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000103936.jpg", "caption": "a reflection of a bus window", "annotations": [{"polygon": [[290, 253], [290, 253], [290, 284], [404, 284], [402, 255]], "text": "edgware", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Edgware", "recog_valid": false, "glyph_recog_text": "edgware", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366086.jpg", "caption": "a stop sign and a traffic light on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000103966.jpg", "caption": "a tennis racket and balls on a blue court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366146.jpg", "caption": "a group of people standing on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366148.jpg", "caption": "two wooden benches sitting next to a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366150.jpg", "caption": "a man sitting on a rock talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104021.jpg", "caption": "a woman is playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104044.jpg", "caption": "an emirates airplane parked on the tarmac", "annotations": [{"polygon": [[216, 254], [219, 228], [228, 206], [287, 212], [275, 265]], "text": "Emirates ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "cnaes", "recog_valid": false, "glyph_recog_text": "Eeriatas", "glyph_recog_ld": 0.2500009374988281}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497261.jpg", "caption": "a baseball game with a batter at home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497274.jpg", "caption": "a truck driving down the street with a bus behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497320.jpg", "caption": "a cat laying on a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366257.jpg", "caption": "a small plane parked on the water near a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366262.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104124.jpg", "caption": "a black and white photo of a man on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235217.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235245.jpg", "caption": "a baseball player is trying to catch a ball", "annotations": [{"polygon": [[512, 292], [462, 316], [448, 246], [460, 228], [504, 224]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "~", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[210, 101], [197, 120], [206, 137], [217, 144], [231, 115]], "text": "MB", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "nMg", "recog_valid": false, "glyph_recog_text": "皇", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104174.jpg", "caption": "two women playing beach volleyball on a sandy beach", "annotations": [{"polygon": [[108, 110], [95, 188], [85, 210], [62, 201], [82, 94]], "text": "bright", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "bright", "recog_valid": true, "glyph_recog_text": "1y61q", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235276.jpg", "caption": "a large military plane taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366349.jpg", "caption": "a teddy bear sitting on a bed with a book", "annotations": [{"polygon": [[135, 285], [168, 251], [179, 248], [222, 213], [235, 231], [152, 299]], "text": "Drupal", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "jednJc", "recog_valid": false, "glyph_recog_text": "Drupal", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[177, 302], [215, 269], [338, 407], [310, 438], [304, 445], [261, 390], [246, 389], [239, 381], [242, 364]], "text": "Drupal", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Drupal", "recog_valid": true, "glyph_recog_text": "Drupal", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497440.jpg", "caption": "a hot dog with peppers and onions", "annotations": [{"polygon": [[383, 352], [380, 141], [335, 137], [352, 338]], "text": "SREDIENHC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CRECIEMKPY", "recog_valid": false, "glyph_recog_text": "nrwa_w", "glyph_recog_ld": 9.99998999939855e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235299.jpg", "caption": "a woman sitting on the floor with a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366382.jpg", "caption": "a vintage kenney's biscuit box and a suitcase", "annotations": [{"polygon": [[438, 134], [298, 117], [294, 142], [434, 159]], "text": "SCUIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SCUIT", "recog_valid": true, "glyph_recog_text": "SCUIT", "glyph_recog_ld": 1.0}, {"polygon": [[216, 336], [216, 351], [409, 381], [413, 366]], "text": "BISCUIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BISCUIT", "recog_valid": true, "glyph_recog_text": "BISCVIT", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366405.jpg", "caption": "a street sign and a fire hydrant on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366408.jpg", "caption": "a breakfast sandwich on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104265.jpg", "caption": "two laptops are sitting on a desk next to a lamp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366414.jpg", "caption": "a police officer on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497494.jpg", "caption": "two men sitting in bed with a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366430.jpg", "caption": "a man and woman riding a motorcycle down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366435.jpg", "caption": "a breakfast of ham and melon on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497519.jpg", "caption": "a baseball player is swinging at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497528.jpg", "caption": "a white oven with a basket full of dishes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104325.jpg", "caption": "a group of people on horses with a banner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104328.jpg", "caption": "a baseball game with a batter and catcher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104368.jpg", "caption": "a group of people sitting around a table eating pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235446.jpg", "caption": "a dog sitting on the sidewalk in front of a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104384.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366528.jpg", "caption": "a man wearing a hat and glasses", "annotations": [{"polygon": [[171, 81], [172, 88], [197, 73], [220, 68], [224, 57], [202, 64], [188, 68]], "text": "WAVES", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "AANES", "recog_valid": false, "glyph_recog_text": "WAVES", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497608.jpg", "caption": "a bird perched on a sign", "annotations": [{"polygon": [[344, 259], [162, 232], [174, 193], [349, 220]], "text": "SPEED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPEED", "recog_valid": true, "glyph_recog_text": "SPEED", "glyph_recog_ld": 1.0}, {"polygon": [[175, 292], [306, 311], [324, 274], [181, 252]], "text": "LIMIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LIMIT", "recog_valid": true, "glyph_recog_text": "LIMIT", "glyph_recog_ld": 1.0}, {"polygon": [[140, 404], [311, 432], [338, 330], [155, 305]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "20", "recog_valid": true, "glyph_recog_text": "20", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104395.jpg", "caption": "a bottle of liquor and an orange on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235471.jpg", "caption": "a red and white building with a sailboat on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497617.jpg", "caption": "a bus is parked on a snowy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235473.jpg", "caption": "a red fire hydrant sitting in the grass next to a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497622.jpg", "caption": "a fighter jet on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497625.jpg", "caption": "a man in a wet suit is standing in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104432.jpg", "caption": "a birthday cake with a circus theme on top", "annotations": [{"polygon": [[181, 298], [175, 324], [204, 333], [220, 336], [237, 338], [254, 339], [266, 339], [280, 339], [292, 338], [309, 338], [318, 338], [324, 311], [309, 311], [297, 313], [282, 312], [255, 309], [228, 307], [207, 305], [198, 305]], "text": "BIRTHDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIRTHDAY", "recog_valid": true, "glyph_recog_text": "BIRTHDAY", "glyph_recog_ld": 1.0}, {"polygon": [[331, 309], [328, 327], [332, 332], [335, 333], [338, 331], [340, 328], [345, 328], [355, 325], [358, 323], [363, 319], [365, 317], [366, 309], [366, 295], [357, 300], [349, 304]], "text": "JASU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JA", "recog_valid": false, "glyph_recog_text": "JASU", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[378, 228], [406, 281], [408, 281], [414, 276], [383, 224]], "text": "PARTYFAVO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "OAVAKIAU", "recog_valid": false, "glyph_recog_text": "265171533", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235529.jpg", "caption": "a street sign with two signs on it", "annotations": [{"polygon": [[167, 226], [283, 175], [289, 232], [172, 275]], "text": "Stevens", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Stevens", "recog_valid": true, "glyph_recog_text": "Stevens", "glyph_recog_ld": 1.0}, {"polygon": [[295, 182], [322, 170], [327, 215], [295, 225]], "text": "St", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "品", "recog_valid": false, "glyph_recog_text": "0.", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[328, 143], [329, 173], [378, 148], [376, 120]], "text": "3000", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3000", "recog_valid": true, "glyph_recog_text": "3000", "glyph_recog_ld": 1.0}, {"polygon": [[76, 242], [70, 310], [261, 347], [263, 283]], "text": "Beacon", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Beacon", "recog_valid": true, "glyph_recog_text": "Beacon", "glyph_recog_ld": 1.0}, {"polygon": [[280, 351], [285, 309], [350, 325], [347, 368]], "text": "Ave", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ave", "recog_valid": true, "glyph_recog_text": "Ave", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235543.jpg", "caption": "a baby sitting on a table with a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104504.jpg", "caption": "a woman in a dress and boots holding an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497731.jpg", "caption": "a stop sign with the words don't believe written on it", "annotations": [{"polygon": [[85, 166], [208, 186], [217, 146], [177, 132], [81, 116]], "text": "DON'T", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DonT", "recog_valid": false, "glyph_recog_text": "DON'T", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[8, 259], [224, 285], [260, 244], [254, 195], [26, 158]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[86, 278], [187, 290], [267, 266], [272, 292], [240, 324], [76, 322]], "text": "BELIEVIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BELievN", "recog_valid": false, "glyph_recog_text": "BELIEVIN", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235591.jpg", "caption": "a young girl sitting on a couch with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366688.jpg", "caption": "a group of young baseball players standing around a trophy", "annotations": [{"polygon": [[274, 272], [268, 297], [294, 313], [302, 286]], "text": "24", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "24", "recog_valid": true, "glyph_recog_text": "24", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104550.jpg", "caption": "a railroad crossing sign with a red and white stripe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366695.jpg", "caption": "a bottle of liquid and a pot on a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366734.jpg", "caption": "a blue and yellow double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497807.jpg", "caption": "a kite flying in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497819.jpg", "caption": "a person riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235681.jpg", "caption": "a fire hydrant on the sidewalk", "annotations": [{"polygon": [[434, 329], [416, 341], [426, 355], [458, 381], [481, 370], [475, 356], [452, 338]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CO", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104621.jpg", "caption": "two people riding motorcycles down the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104631.jpg", "caption": "a surfboard laying on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497873.jpg", "caption": "a man riding a horse in a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235734.jpg", "caption": "a man is doing a wakeboard trick in the air", "annotations": [{"polygon": [[206, 132], [342, 301], [357, 302], [374, 297], [244, 135], [215, 128]], "text": "OBRIEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OBRIEN", "recog_valid": true, "glyph_recog_text": "OBRIEN", "glyph_recog_ld": 1.0}, {"polygon": [[254, 223], [297, 276], [307, 272], [264, 219]], "text": "BEKER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BAKER", "recog_valid": false, "glyph_recog_text": "BEKER", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366812.jpg", "caption": "a banner with a message has texts is flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104711.jpg", "caption": "several motorcycles are on display at an indoor event", "annotations": [{"polygon": [[80, 392], [24, 425], [71, 430], [112, 406]], "text": "18", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "218", "recog_valid": false, "glyph_recog_text": "1 8", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235799.jpg", "caption": "a skateboarder doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235809.jpg", "caption": "a white bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366907.jpg", "caption": "a blue and white bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366927.jpg", "caption": "a crowd of people standing in a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366933.jpg", "caption": "a man doing a trick on a skateboard", "annotations": [{"polygon": [[189, 127], [224, 94], [225, 109], [194, 141]], "text": "FALLEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "FALLEN", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235876.jpg", "caption": "a group of police officers on motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104809.jpg", "caption": "a small clock on a counter next to a mug", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235905.jpg", "caption": "a red and white bus on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104846.jpg", "caption": "a man sitting on the snow with a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366994.jpg", "caption": "a street with a red and white taxi and a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367003.jpg", "caption": "a cat sitting on a box watching a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498082.jpg", "caption": "a woman holding a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235949.jpg", "caption": "a young boy eating a piece of cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235967.jpg", "caption": "a person holding a skateboard with graffiti on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235976.jpg", "caption": "a group of people standing in front of a bus", "annotations": [{"polygon": [[86, 208], [98, 174], [225, 204], [219, 232]], "text": "SouthDakota", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SatPkon", "recog_valid": false, "glyph_recog_text": "SouthDakota", "glyph_recog_ld": 0.3636369421482344}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367071.jpg", "caption": "three different types of cake on plates", "annotations": [{"polygon": [[424, 215], [378, 290], [453, 330], [494, 245]], "text": "SO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104935.jpg", "caption": "a bus driving down a street with tall buildings in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367085.jpg", "caption": "a little girl eating a piece of pizza", "annotations": [{"polygon": [[221, 384], [238, 292], [241, 275], [242, 257], [243, 256], [251, 256], [255, 258], [254, 264], [262, 260], [264, 259], [284, 258], [285, 262], [287, 277], [285, 288], [284, 299], [271, 368], [271, 376], [270, 382], [263, 387]], "text": "SMILING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JMILING", "recog_valid": false, "glyph_recog_text": "0巨---", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[241, 258], [238, 278], [236, 296], [231, 314], [227, 341], [220, 341], [213, 335], [211, 331], [193, 323], [181, 324], [176, 318], [203, 247], [209, 246], [213, 254], [219, 254], [223, 254], [229, 254], [231, 256], [239, 254]], "text": "DEPT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DEPT", "recog_valid": true, "glyph_recog_text": "Ld30", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498160.jpg", "caption": "a man is standing next to a horse on a muddy track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498173.jpg", "caption": "a man in yellow shirt playing tennis on a court", "annotations": [{"polygon": [[53, 107], [160, 108], [161, 138], [54, 136]], "text": "TENNIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "TENNIS", "recog_valid": true, "glyph_recog_text": "TENNIS", "glyph_recog_ld": 1.0}, {"polygon": [[63, 200], [63, 206], [65, 206], [60, 249], [75, 250], [78, 231], [86, 229], [91, 228], [95, 228], [95, 233], [182, 231], [185, 206], [189, 203], [188, 199]], "text": "PENN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "PENN", "recog_valid": true, "glyph_recog_text": "PENN", "glyph_recog_ld": 1.0}, {"polygon": [[74, 256], [69, 268], [68, 281], [76, 283], [93, 283], [101, 280], [104, 272], [107, 269], [197, 263], [202, 232], [88, 235], [80, 238], [75, 249]], "text": "STATE ", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STATE", "recog_valid": false, "glyph_recog_text": "STATE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236036.jpg", "caption": "three baseball players walking on the field", "annotations": [{"polygon": [[6, 257], [10, 282], [74, 275], [69, 249]], "text": "TER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "TERS", "recog_valid": false, "glyph_recog_text": "TER", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236052.jpg", "caption": "a person sitting on a bench in front of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236057.jpg", "caption": "a man and a woman sitting on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236078.jpg", "caption": "a baseball player throwing a pitch on a field", "annotations": [{"polygon": [[223, 223], [233, 196], [276, 204], [268, 228]], "text": "dgers", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "dgero", "recog_valid": false, "glyph_recog_text": "dgers", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367171.jpg", "caption": "a young boy standing next to a sign that says sir matt busby way", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236102.jpg", "caption": "a baseball player sliding into home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236111.jpg", "caption": "a group of people standing in line at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105047.jpg", "caption": "a man with skis on a snowy mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236126.jpg", "caption": "a laptop computer sitting on a table", "annotations": [{"polygon": [[304, 216], [303, 240], [329, 234], [324, 253], [332, 272], [335, 231], [332, 212]], "text": "er", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "L", "recog_valid": false, "glyph_recog_text": "①", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236138.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105066.jpg", "caption": "watermelon, bananas, and other fruits are displayed at a market", "annotations": [{"polygon": [[328, 267], [411, 272], [417, 368], [330, 369]], "text": "89", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "89", "recog_valid": true, "glyph_recog_text": "8", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[306, 123], [337, 128], [337, 170], [309, 173]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LC", "recog_valid": false, "glyph_recog_text": "LO", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236141.jpg", "caption": "a person riding a snowboard down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367222.jpg", "caption": "a red and white train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105096.jpg", "caption": "alaska airlines boeing 737-800", "annotations": [{"polygon": [[99, 259], [122, 252], [159, 253], [163, 268], [159, 277], [144, 275], [132, 277], [125, 282], [122, 277], [100, 277], [78, 279], [76, 271], [82, 265]], "text": "Alaska", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "hgri", "recog_valid": false, "glyph_recog_text": "Alaska", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236176.jpg", "caption": "a woman riding a horse in an arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236186.jpg", "caption": "a baseball player swinging a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367259.jpg", "caption": "a group of children sitting at a table with an umbrella", "annotations": [{"polygon": [[392, 94], [410, 85], [422, 84], [473, 66], [486, 64], [479, 80], [447, 92], [417, 102], [414, 99], [397, 107]], "text": "BIRTHDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "BIRTHDAY", "recog_valid": true, "glyph_recog_text": "BIRTHDAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236217.jpg", "caption": "a black and white photo of people walking in the rain", "annotations": [{"polygon": [[41, 170], [40, 200], [174, 198], [174, 171]], "text": "SINCE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SINCE", "recog_valid": true, "glyph_recog_text": "SINCE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236224.jpg", "caption": "a busy street at night with neon signs and cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236245.jpg", "caption": "two boys playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367319.jpg", "caption": "miniature donuts and a penny on a table", "annotations": [{"polygon": [[434, 282], [428, 293], [467, 315], [474, 305]], "text": "CENT", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CENT", "recog_valid": true, "glyph_recog_text": "CENT", "glyph_recog_ld": 1.0}, {"polygon": [[361, 321], [361, 321], [353, 330], [393, 356], [403, 352], [403, 344], [376, 328]], "text": "2009", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2009", "recog_valid": true, "glyph_recog_text": "2009", "glyph_recog_ld": 1.0}, {"polygon": [[351, 353], [366, 379], [382, 392], [418, 407], [455, 407], [444, 390], [423, 390], [404, 384], [392, 376], [375, 363], [367, 348], [358, 345]], "text": "CANADA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CINAOR", "recog_valid": false, "glyph_recog_text": "CANADA", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236260.jpg", "caption": "a chef cutting meat on a wooden cutting board", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498406.jpg", "caption": "a baseball player holding a bat in front of a fence", "annotations": [{"polygon": [[371, 256], [417, 250], [410, 215], [367, 220]], "text": "DIMAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "DIMAS", "recog_valid": true, "glyph_recog_text": "DAMAS", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[345, 263], [434, 249], [438, 360], [347, 373]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "N", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[443, 140], [442, 160], [506, 126], [498, 106]], "text": "TPX", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "S208m", "recog_valid": false, "glyph_recog_text": "TPX", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367335.jpg", "caption": "saab 9-3 1,5 dci turbo", "annotations": [{"polygon": [[154, 184], [247, 195], [245, 217], [156, 207], [149, 203]], "text": "SAAB", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SAAB", "recog_valid": true, "glyph_recog_text": "SAAB", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105192.jpg", "caption": "a cat walking next to a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498409.jpg", "caption": "a blender sitting on a counter with a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236267.jpg", "caption": "an old photo of a bus driving down a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498449.jpg", "caption": "two children in ski gear pose for a picture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236312.jpg", "caption": "a tall clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367409.jpg", "caption": "a man riding a bicycle on a street next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236348.jpg", "caption": "a slice of cake with blue frosting and pink frosting", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498493.jpg", "caption": "a black bear walking on a muddy ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367425.jpg", "caption": "a truck with a large container on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236375.jpg", "caption": "a man in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105310.jpg", "caption": "a woman is standing in front of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236388.jpg", "caption": "ford f-150 lariat at the 2014 new york auto show", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367461.jpg", "caption": "a motorcycle with an orange body on display at a show", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236390.jpg", "caption": "a group of people on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367477.jpg", "caption": "a man standing next to a bike with a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498563.jpg", "caption": "a one way sign with a tree in the background", "annotations": [{"polygon": [[260, 232], [344, 236], [350, 275], [264, 270]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[260, 278], [352, 282], [353, 318], [264, 317]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498570.jpg", "caption": "a silver suv with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105374.jpg", "caption": "a man in a suit and tie talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105388.jpg", "caption": "a book on a table with a tablet and other books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367537.jpg", "caption": "a stop sign on a road", "annotations": [{"polygon": [[290, 283], [290, 283], [290, 313], [380, 315], [380, 281], [289, 281]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[264, 267], [272, 278], [290, 268], [309, 263], [326, 261], [327, 248], [327, 248], [298, 253], [283, 258]], "text": "RAILWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RAILWAY", "recog_valid": true, "glyph_recog_text": "RAILWAY", "glyph_recog_ld": 1.0}, {"polygon": [[339, 248], [339, 248], [339, 260], [339, 260], [366, 264], [391, 272], [401, 278], [408, 268], [388, 257], [362, 250]], "text": "CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CROSSING", "recog_valid": true, "glyph_recog_text": "CROSSING", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367538.jpg", "caption": "a group of people holding umbrellas in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105402.jpg", "caption": "a display case filled with various types of pastries", "annotations": [{"polygon": [[248, 50], [263, 49], [321, 67], [325, 80], [252, 80]], "text": "Samosa", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Samosa", "recog_valid": true, "glyph_recog_text": "Samosa", "glyph_recog_ld": 1.0}, {"polygon": [[298, 118], [318, 84], [331, 88], [316, 119]], "text": "10.-", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "它5", "recog_valid": false, "glyph_recog_text": "10..", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236484.jpg", "caption": "pineapples and bananas are displayed in boxes", "annotations": [{"polygon": [[427, 141], [426, 172], [433, 181], [449, 176], [466, 171], [477, 167], [474, 147], [469, 141]], "text": "120", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "81320", "recog_valid": false, "glyph_recog_text": "120", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[240, 144], [240, 174], [265, 168], [265, 162], [274, 162], [274, 156], [270, 150], [257, 146]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "P", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[1, 184], [0, 217], [39, 204], [40, 189], [33, 177]], "text": "120", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "iFo", "recog_valid": false, "glyph_recog_text": "120", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105418.jpg", "caption": "two men standing next to a car with an umbrella", "annotations": [{"polygon": [[268, 100], [271, 199], [5, 262], [4, 193]], "text": "UNDAI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNPAI", "recog_valid": false, "glyph_recog_text": "UNDAI", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[235, 412], [210, 441], [-1, 396], [4, 362]], "text": "UNDAI", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "1MDVI", "recog_valid": false, "glyph_recog_text": "UNDAI", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105452.jpg", "caption": "two people standing next to snowmen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367605.jpg", "caption": "a tennis player is swinging his racket at a ball", "annotations": [{"polygon": [[390, 241], [390, 279], [475, 282], [478, 250], [453, 244], [428, 240], [408, 243]], "text": "Emirates", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Emirates", "recog_valid": true, "glyph_recog_text": "Emirates", "glyph_recog_ld": 1.0}, {"polygon": [[487, 241], [480, 282], [512, 284], [513, 241]], "text": "Ai", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "<.", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105468.jpg", "caption": "a black and white photo of a boat in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367614.jpg", "caption": "a group of people standing around a luggage carousel", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498706.jpg", "caption": "two people on skis in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367641.jpg", "caption": "two people skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105504.jpg", "caption": "a man on a skateboard doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105516.jpg", "caption": "a man playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236593.jpg", "caption": "a bike is parked on a sidewalk next to a red building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236596.jpg", "caption": "a can of mountain dew", "annotations": [{"polygon": [[337, 349], [352, 92], [514, 71], [514, 210]], "text": "Mol", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "白", "recog_valid": false, "glyph_recog_text": "20", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[355, 412], [353, 233], [512, 207], [512, 345]], "text": "De", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "3", "recog_valid": false, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105546.jpg", "caption": "a bulldog drinking water from a bottle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498792.jpg", "caption": "two sailboats are racing in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105592.jpg", "caption": "a large urn with a carved design on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367763.jpg", "caption": "a group of people standing on top of a vehicle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367767.jpg", "caption": "a yellow fire hydrant on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367776.jpg", "caption": "a man throwing a baseball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105640.jpg", "caption": "a woman walking down a snowy path with a red umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105643.jpg", "caption": "a bowl of food with broccoli, chicken, and orange", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367788.jpg", "caption": "a row of white and blue delivery trucks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367814.jpg", "caption": "a display of various types of scissors and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367823.jpg", "caption": "a baseball player holding a bat in his hand", "annotations": [{"polygon": [[176, 215], [176, 215], [166, 204], [171, 186], [181, 176], [196, 183], [182, 211]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105685.jpg", "caption": "a group of people standing around a table with a child", "annotations": [{"polygon": [[115, 404], [138, 381], [149, 395], [129, 418]], "text": "EAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "huapy", "recog_valid": false, "glyph_recog_text": "EAT", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[138, 379], [161, 345], [174, 359], [150, 395]], "text": "DOUGH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "hreme", "recog_valid": false, "glyph_recog_text": "OOUGH", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[150, 398], [158, 391], [166, 383], [175, 372], [182, 363], [186, 356], [188, 351], [193, 355], [187, 365], [179, 377], [170, 388], [162, 395], [154, 403]], "text": "DOUGHNUTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DOUGHNUTS", "recog_valid": true, "glyph_recog_text": "CONRARTE", "glyph_recog_ld": 0.22222308641879285}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498943.jpg", "caption": "a woman walking down the street with a bag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105733.jpg", "caption": "a plate with a donut and a drink on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367899.jpg", "caption": "a computer monitor sitting on a desk with a keyboard and mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498982.jpg", "caption": "a group of people playing baseball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105768.jpg", "caption": "a table with a bunch of bananas on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367919.jpg", "caption": "a man sitting on a motorcycle next to a gas pump", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105777.jpg", "caption": "a small airplane parked behind a chain link fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367936.jpg", "caption": "a man holding a tennis racket on a tennis court", "annotations": [{"polygon": [[180, 109], [187, 140], [88, 144], [85, 111]], "text": "ROUP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ROUP", "recog_valid": true, "glyph_recog_text": "ROUP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367961.jpg", "caption": "a silver bus on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105820.jpg", "caption": "a cat sitting on a book shelf in front of a bookcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367969.jpg", "caption": "a person riding a skateboard down a street", "annotations": [{"polygon": [[33, 398], [79, 391], [106, 448], [45, 461]], "text": "LOOK", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "复", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367982.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236910.jpg", "caption": "a person is skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105877.jpg", "caption": "a woman brushing her hair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368055.jpg", "caption": "a baseball player holding a bat in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499130.jpg", "caption": "a doll with blonde hair holding a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368060.jpg", "caption": "a man on a motorcycle with a dog on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105936.jpg", "caption": "a woman with a backpack", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237009.jpg", "caption": "a purple teddy bear sitting next to a book", "annotations": [{"polygon": [[332, 312], [412, 286], [415, 285], [421, 308], [337, 335]], "text": "BLOOD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BLOOD", "recog_valid": true, "glyph_recog_text": "BLOOD", "glyph_recog_ld": 1.0}, {"polygon": [[34, 76], [34, 76], [58, 87], [33, 133], [18, 127]], "text": "Relax", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Relaae", "recog_valid": false, "glyph_recog_text": "xejeg", "glyph_recog_ld": 0.1666680555532407}, {"polygon": [[44, 145], [20, 262], [47, 274], [84, 165]], "text": "Tranquality", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Jranqulity", "recog_valid": false, "glyph_recog_text": "jenbue.", "glyph_recog_ld": 0.20000079999919995}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499164.jpg", "caption": "a stop sign and a piano on the side of the road", "annotations": [{"polygon": [[130, 128], [173, 121], [178, 126], [178, 132], [177, 152], [128, 154]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237031.jpg", "caption": "a woman laying on a bed with her belongings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237054.jpg", "caption": "a computer monitor and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499204.jpg", "caption": "a hot dog and a drink on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237064.jpg", "caption": "a man walking in front of a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105998.jpg", "caption": "beach soccer tournament in the sand", "annotations": [{"polygon": [[60, 224], [60, 241], [178, 223], [180, 211]], "text": "HOLLAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HOLLANACE", "recog_valid": false, "glyph_recog_text": "HOLLAN", "glyph_recog_ld": 0.6666670370366254}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368160.jpg", "caption": "a man wearing a towel", "annotations": [{"polygon": [[290, 361], [332, 349], [339, 374], [297, 385]], "text": "Google", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Gofle", "recog_valid": false, "glyph_recog_text": "Google", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499259.jpg", "caption": "a one way sign on a street corner", "annotations": [{"polygon": [[186, 298], [303, 293], [304, 340], [183, 347]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[181, 358], [309, 353], [295, 407], [188, 408]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499267.jpg", "caption": "a car driving by a traffic light with a heart on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368205.jpg", "caption": "a group of cows walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368208.jpg", "caption": "a person holding a cell phone with a screen on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106072.jpg", "caption": "a red car and a motorcycle parked on a driveway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237160.jpg", "caption": "a man in a suit and tie standing in front of a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499305.jpg", "caption": "a young girl playing a video game in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237162.jpg", "caption": "a man on a skateboard doing a trick on a pipe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499307.jpg", "caption": "a baseball game is being played on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106100.jpg", "caption": "a cake with the number 25 on it", "annotations": [{"polygon": [[287, 145], [291, 179], [281, 191], [219, 189], [217, 161], [223, 145]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "26", "recog_valid": false, "glyph_recog_text": "25", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237186.jpg", "caption": "a fire truck driving down the road", "annotations": [{"polygon": [[315, 164], [313, 190], [346, 194], [349, 167]], "text": "FIRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FIRE", "recog_valid": true, "glyph_recog_text": "FIRE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106119.jpg", "caption": "a baseball player throwing a pitch at a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237201.jpg", "caption": "1949 light duty models ad", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237203.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[86, 86], [149, 85], [149, 139], [88, 137]], "text": "Z", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "IZ", "recog_valid": false, "glyph_recog_text": "z", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499353.jpg", "caption": "a building with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106144.jpg", "caption": "a man laying on the floor with his belongings scattered around him", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499364.jpg", "caption": "a table with two laptops and a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499372.jpg", "caption": "two people are working on a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237233.jpg", "caption": "a girl smiling at a table with a lot of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237245.jpg", "caption": "a woman in a blue sweatshirt", "annotations": [{"polygon": [[254, 362], [259, 344], [264, 341], [285, 355], [289, 362], [285, 383], [267, 371]], "text": "CO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CO.", "recog_valid": false, "glyph_recog_text": "co", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499396.jpg", "caption": "a snowboarder in the air", "annotations": [{"polygon": [[106, 230], [220, 298], [248, 280], [140, 215]], "text": "FORUM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "20日", "recog_valid": false, "glyph_recog_text": "FORUM", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237279.jpg", "caption": "two cows walking down a road near trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237309.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237315.jpg", "caption": "a blue motorcycle parked next to a wooden building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368421.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[281, 364], [283, 394], [359, 392], [358, 372]], "text": "images", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "c eges", "recog_valid": false, "glyph_recog_text": "images", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237355.jpg", "caption": "a woman on a train talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368440.jpg", "caption": "a large white and orange airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499523.jpg", "caption": "a soccer player is running after the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368468.jpg", "caption": "two trains are on the tracks at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368475.jpg", "caption": "a man is playing tennis on a court with a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106364.jpg", "caption": "a surfer riding a wave in the ocean", "annotations": [{"polygon": [[407, 189], [510, 198], [499, 223], [445, 222], [399, 215]], "text": "Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Ghotogrophy", "recog_valid": false, "glyph_recog_text": "Photography", "glyph_recog_ld": 0.8181819834709241}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237445.jpg", "caption": "a bus with an orange and gold color", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368521.jpg", "caption": "a baseball player throwing a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106382.jpg", "caption": "a double decker bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106383.jpg", "caption": "a microwave oven with a purple box on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106397.jpg", "caption": "a parking meter with two parking spaces", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237477.jpg", "caption": "a man on a skateboard doing a trick on a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237502.jpg", "caption": "a man on a skateboard doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368577.jpg", "caption": "a little girl eating a banana", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106438.jpg", "caption": "two people holding two different colored cell phones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368603.jpg", "caption": "a black cat sitting on a wooden floor near a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106463.jpg", "caption": "police motorcycles on a street with people and buildings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237538.jpg", "caption": "a boy is watching an air force one plane take off", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237542.jpg", "caption": "air france airbus a320-214 airbus a320-214 airbus a320-214 airbus a320-214 airbus a320-", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106471.jpg", "caption": "a double decker bus with a rainbow flag on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237550.jpg", "caption": "a motorcycle shop with many motorcycles parked inside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499697.jpg", "caption": "a rusty sign that says take make bros", "annotations": [{"polygon": [[258, 174], [263, 224], [424, 221], [422, 169]], "text": "TANE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TAME", "recog_valid": false, "glyph_recog_text": "TANE", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[264, 231], [269, 293], [403, 286], [399, 228]], "text": "MAKE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MAKE", "recog_valid": true, "glyph_recog_text": "MAKE", "glyph_recog_ld": 1.0}, {"polygon": [[281, 294], [285, 347], [422, 343], [418, 288]], "text": "BROS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BROS", "recog_valid": true, "glyph_recog_text": "BROS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368637.jpg", "caption": "a large clock tower with two clocks on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499719.jpg", "caption": "a stop sign on a street corner", "annotations": [{"polygon": [[130, 298], [234, 265], [265, 281], [169, 314]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237587.jpg", "caption": "a fire hydrant in front of a house with a sign has texts", "annotations": [{"polygon": [[226, 21], [218, 47], [189, 38], [201, 7]], "text": "gai", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Jai", "recog_valid": false, "glyph_recog_text": "g", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[229, 17], [226, 46], [267, 61], [271, 36]], "text": "mode", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Tlode", "recog_valid": false, "glyph_recog_text": "mode", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[187, 43], [187, 68], [272, 89], [274, 68]], "text": "BEAUTY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SEAUTY", "recog_valid": false, "glyph_recog_text": "BEAUTY", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[193, 75], [193, 99], [269, 114], [268, 94]], "text": "SALON", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SALON", "recog_valid": true, "glyph_recog_text": "SALON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106517.jpg", "caption": "a man and a child on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237608.jpg", "caption": "a black and white photo of a fireplace with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499760.jpg", "caption": "a red car parked on the street", "annotations": [{"polygon": [[382, 398], [384, 408], [440, 432], [444, 425]], "text": "YCKM", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "OYCKM", "recog_valid": false, "glyph_recog_text": "vcso", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499763.jpg", "caption": "a woman holding a surfboard on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106557.jpg", "caption": "a pink container with a spoon and a spoon in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106562.jpg", "caption": "a group of men standing next to an old steam engine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368717.jpg", "caption": "a black and red train engine at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237645.jpg", "caption": "a miniature food item on a table", "annotations": [{"polygon": [[72, 286], [75, 367], [24, 375], [29, 298]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "LO", "recog_valid": false, "glyph_recog_text": "LO", "glyph_recog_ld": 1.0}, {"polygon": [[75, 281], [75, 302], [138, 287], [138, 268]], "text": "CENT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CENT", "recog_valid": true, "glyph_recog_text": "CENT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368736.jpg", "caption": "a clock tower with a cross on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368750.jpg", "caption": "a traffic sign on a highway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499829.jpg", "caption": "a wooden box with stuffed animals and a baby stroller", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106615.jpg", "caption": "a man in a ski suit is standing on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368768.jpg", "caption": "a fire hydrant on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106636.jpg", "caption": "two pink bento boxes with vegetables and meat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237718.jpg", "caption": "a street sign with a street name and a street name", "annotations": [{"polygon": [[92, 321], [89, 347], [221, 339], [219, 316]], "text": "Oberon", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Oberon", "recog_valid": true, "glyph_recog_text": "Oberon", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106646.jpg", "caption": "a donut and coffee on a tray next to a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499867.jpg", "caption": "a room with a clock, books, and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499871.jpg", "caption": "a row of parking meters with a blue face", "annotations": [{"polygon": [[325, 320], [366, 345], [362, 369], [323, 346]], "text": "1AX8160", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0", "recog_valid": false, "glyph_recog_text": "1AX8160", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237743.jpg", "caption": "umbrellas for sale in the window of a store", "annotations": [{"polygon": [[289, 385], [378, 384], [379, 416], [290, 416]], "text": "balra", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Malra", "recog_valid": false, "glyph_recog_text": "balra", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368818.jpg", "caption": "a bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106688.jpg", "caption": "1953 british royal enfield t100b, frame no", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499911.jpg", "caption": "a man is looking at a motorcycle at a show", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368845.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237798.jpg", "caption": "a man is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368875.jpg", "caption": "a desk with a laptop and a computer on it", "annotations": [{"polygon": [[354, 408], [352, 420], [348, 433], [336, 446], [349, 449], [354, 439], [360, 422], [367, 410]], "text": "ETERNALS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SNKEEIR", "recog_valid": false, "glyph_recog_text": "ETEAALS", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368893.jpg", "caption": "a sign that says i cabbage town on a street", "annotations": [{"polygon": [[184, 145], [216, 145], [213, 186], [187, 188]], "text": "I", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "一", "recog_valid": false, "glyph_recog_text": "-", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[174, 201], [315, 202], [316, 241], [180, 246], [165, 239], [161, 220], [164, 209]], "text": "CABBAGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CABBAGE", "recog_valid": true, "glyph_recog_text": "CABBAGE", "glyph_recog_ld": 1.0}, {"polygon": [[200, 247], [301, 245], [300, 285], [206, 284]], "text": "TOWN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TOWN", "recog_valid": true, "glyph_recog_text": "TOWN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237831.jpg", "caption": "a woman and a baby sitting at a table", "annotations": [{"polygon": [[321, 366], [296, 391], [318, 403], [343, 377]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237843.jpg", "caption": "a delta airplane taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106772.jpg", "caption": "a young boy stirring food in a pan on the stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237861.jpg", "caption": "a group of girls playing softball in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500005.jpg", "caption": "a woman is swinging a tennis racket at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368956.jpg", "caption": "a group of people skiing down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368969.jpg", "caption": "a baseball player standing on a field with a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106836.jpg", "caption": "a woman and two children sitting at a table with cupcakes", "annotations": [{"polygon": [[353, 173], [353, 190], [389, 190], [407, 186], [438, 179], [434, 160]], "text": "happu", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ha ppy", "recog_valid": false, "glyph_recog_text": "happu", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500057.jpg", "caption": "two little girls eating broccoli and flowers in a pan", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369000.jpg", "caption": "a large elephant standing in a dirt field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500076.jpg", "caption": "a group of people riding horses down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106874.jpg", "caption": "a man in red and black ski gear jumping over a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369019.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106875.jpg", "caption": "a wooden clock with a mouse on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369038.jpg", "caption": "a person skiing down a slope with a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106902.jpg", "caption": "a kitchen with a refrigerator, microwave, and stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237976.jpg", "caption": "a woman taking a selfie in a bathroom mirror", "annotations": [{"polygon": [[263, 0], [275, 14], [309, 21], [343, 49], [386, 110], [403, 104], [402, 71], [371, 25], [331, 0]], "text": "beautiful", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "poudilve", "recog_valid": false, "glyph_recog_text": "Seautiful", "glyph_recog_ld": 0.22222308641879285}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238004.jpg", "caption": "a boat with people on it in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238006.jpg", "caption": "a sign for a restaurant is on a building", "annotations": [{"polygon": [[333, 143], [333, 165], [407, 184], [408, 162]], "text": "GOMISTERI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GOMISTER", "recog_valid": false, "glyph_recog_text": "GOMISTERI", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369078.jpg", "caption": "a baseball player is at home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369087.jpg", "caption": "two women playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500165.jpg", "caption": "a bench with a statue of a dog and a kangaroo", "annotations": [{"polygon": [[90, 137], [177, 139], [192, 106], [139, 102]], "text": "BU", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "30", "recog_valid": false, "glyph_recog_text": "B U", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500169.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238048.jpg", "caption": "a man carrying a pizza tray", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238051.jpg", "caption": "a train is parked at a station with people on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500224.jpg", "caption": "a young boy holding a baseball bat and a catcher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107011.jpg", "caption": "a police officer is standing in the middle of the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107013.jpg", "caption": "a mirror in a bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369156.jpg", "caption": "two old military trucks parked on a brick road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500233.jpg", "caption": "a plate of food with rice and vegetables on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238095.jpg", "caption": "a woman riding a horse down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107035.jpg", "caption": "a snowboarder in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238117.jpg", "caption": "a fighter jet is parked on the deck of a ship", "annotations": [{"polygon": [[54, 247], [138, 251], [135, 286], [54, 282]], "text": "0327", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "0327", "recog_valid": true, "glyph_recog_text": "0327", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369207.jpg", "caption": "a white towel with toothbrushes and toothpaste", "annotations": [{"polygon": [[198, 84], [221, 107], [231, 99], [208, 76]], "text": "short", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "short", "recog_valid": true, "glyph_recog_text": "sfort", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369212.jpg", "caption": "a multi tool with a knife, scissors and other tools", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369213.jpg", "caption": "a child laying in bed reading a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238184.jpg", "caption": "a clock on a building with a statue of a man on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107119.jpg", "caption": "a soccer player kicking the ball into the air", "annotations": [{"polygon": [[32, 138], [43, 181], [66, 174], [58, 136]], "text": "14", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "14", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[111, 228], [129, 268], [254, 269], [234, 227]], "text": "peal", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Pal", "recog_valid": false, "glyph_recog_text": "peal", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238210.jpg", "caption": "a baseball player in purple and blue uniform", "annotations": [{"polygon": [[161, 270], [186, 342], [323, 307], [306, 261], [233, 261]], "text": "Wildcats", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wetidenty", "recog_valid": false, "glyph_recog_text": "Wildcats", "glyph_recog_ld": 0.333334074073251}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107144.jpg", "caption": "a plate with meat and a knife on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369295.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369299.jpg", "caption": "two yellow and red sports cars parked on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369333.jpg", "caption": "a group of people skiing down a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238286.jpg", "caption": "a laptop computer sitting on a table with a cup of coffee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369362.jpg", "caption": "a giraffe eating a baby", "annotations": [{"polygon": [[27, 273], [57, 259], [57, 284], [27, 298]], "text": "SURF", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SURE", "recog_valid": false, "glyph_recog_text": "SURF", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107225.jpg", "caption": "a billboard for the new york police department is on the side of a building", "annotations": [{"polygon": [[271, 116], [269, 143], [263, 159], [253, 163], [91, 102], [92, 85], [244, 84], [262, 92]], "text": "IZOD", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "S4D", "recog_valid": false, "glyph_recog_text": "IZOD", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[376, 296], [371, 319], [322, 320], [322, 288], [332, 287]], "text": "Dept", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DEPT", "recog_valid": false, "glyph_recog_text": "Dept", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[316, 299], [316, 321], [256, 323], [255, 290]], "text": "POLICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "POLICE", "recog_valid": true, "glyph_recog_text": "POLICE", "glyph_recog_ld": 1.0}, {"polygon": [[449, 161], [461, 198], [513, 191], [513, 151]], "text": "YA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "YA", "recog_valid": true, "glyph_recog_text": "YA", "glyph_recog_ld": 1.0}, {"polygon": [[169, 296], [169, 325], [251, 323], [252, 291]], "text": "NEW YORK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NEWYORK", "recog_valid": false, "glyph_recog_text": "NEW YORK", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107233.jpg", "caption": "a red umbrella with a sign that says chimay", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369397.jpg", "caption": "a plate of food with vegetables and meat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238345.jpg", "caption": "a highway with cars driving on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238352.jpg", "caption": "two books on a table with a plate of food", "annotations": [{"polygon": [[77, 139], [70, 171], [88, 172], [174, 173], [186, 150], [187, 148], [105, 140], [78, 139]], "text": "TWIST", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "TWIST", "recog_valid": true, "glyph_recog_text": "TWIST", "glyph_recog_ld": 1.0}, {"polygon": [[115, 176], [112, 206], [224, 207], [237, 183], [161, 175], [115, 175]], "text": "WRIST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WRIST", "recog_valid": true, "glyph_recog_text": "WRIST", "glyph_recog_ld": 1.0}, {"polygon": [[137, 375], [132, 408], [226, 407], [231, 377]], "text": "CODE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "CODE", "recog_valid": true, "glyph_recog_text": "CODE", "glyph_recog_ld": 1.0}, {"polygon": [[27, 372], [20, 402], [123, 405], [128, 375]], "text": "KEITH", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "KEITH", "recog_valid": true, "glyph_recog_text": "KEITH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500499.jpg", "caption": "a newspaper on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369438.jpg", "caption": "a stop sign in the middle of nowhere", "annotations": [{"polygon": [[207, 232], [207, 266], [284, 264], [284, 233]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107305.jpg", "caption": "a red building with motorcycles parked outside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500525.jpg", "caption": "a white truck with a white bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238390.jpg", "caption": "a truck is loading a large green dump truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107353.jpg", "caption": "a man and a cow walk down a narrow street in india", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369509.jpg", "caption": "a clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369516.jpg", "caption": "a bathroom with urinals and a newspaper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369521.jpg", "caption": "a man and his dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369543.jpg", "caption": "a green and white vase with a flower design", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107402.jpg", "caption": "a cake with a dog on it", "annotations": [{"polygon": [[205, 314], [267, 268], [340, 429], [284, 458]], "text": "30", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "30", "recog_valid": true, "glyph_recog_text": "3 0", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500622.jpg", "caption": "a crowd of people watching a parade", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107425.jpg", "caption": "a bike parked next to a metal sculpture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369576.jpg", "caption": "a fire truck with a hose on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238505.jpg", "caption": "a clock on a building at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107436.jpg", "caption": "a cat sitting on a tv watching a tv show", "annotations": [{"polygon": [[152, 139], [151, 156], [196, 171], [196, 161], [195, 156], [167, 143]], "text": "Modern Marves", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MoenNa", "recog_valid": false, "glyph_recog_text": "SNAVPTAPOU", "glyph_recog_ld": 9.99998999939855e-07}, {"polygon": [[110, 53], [110, 78], [134, 94], [137, 102], [140, 102], [142, 81], [134, 67]], "text": "To", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Today", "recog_valid": false, "glyph_recog_text": "T o", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[130, 391], [131, 430], [138, 426], [152, 407], [170, 388], [181, 376], [181, 339]], "text": "CSI:", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "CS", "recog_valid": false, "glyph_recog_text": "CSI:", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[110, 289], [110, 307], [112, 311], [116, 310], [120, 307], [126, 306], [132, 302], [140, 299], [140, 278]], "text": "OXGN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "OXGN", "recog_valid": true, "glyph_recog_text": "OXGN", "glyph_recog_ld": 1.0}, {"polygon": [[101, 0], [100, 16], [128, 41], [130, 24], [122, 14], [120, 10]], "text": "DALLS", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "oalls", "recog_valid": false, "glyph_recog_text": "DALLS", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[126, 0], [164, 45], [165, 29], [155, 13], [144, 0]], "text": "evolution a", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "evoolona", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369603.jpg", "caption": "a black and white photo of a child holding an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238554.jpg", "caption": "a giraffe standing next to a rock wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500712.jpg", "caption": "a red truck parked next to a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238568.jpg", "caption": "a bride and groom walking in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238589.jpg", "caption": "a man holding a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107518.jpg", "caption": "two women eating pizza at a street fair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238605.jpg", "caption": "a large white building with a large clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500764.jpg", "caption": "a red couch with pillows and bookshelves in front of a bookshelf", "annotations": [{"polygon": [[443, 314], [504, 337], [511, 316], [450, 298]], "text": "ESTATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ESTAT", "recog_valid": false, "glyph_recog_text": "ESTATE", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369712.jpg", "caption": "a white keyboard and mouse on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107578.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369722.jpg", "caption": "a parking meter is in front of a blue wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369726.jpg", "caption": "a woman is serving food to children at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107586.jpg", "caption": "a food truck parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107623.jpg", "caption": "a cat is sitting in a suitcase", "annotations": [{"polygon": [[79, 144], [123, 115], [126, 120], [85, 149]], "text": "FIFTP VENIE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "LEEVENUA", "recog_valid": false, "glyph_recog_text": "ifreryouk.", "glyph_recog_ld": 9.99998999939855e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369775.jpg", "caption": "a man is playing tennis on a tennis court", "annotations": [{"polygon": [[84, 52], [85, 89], [85, 92], [104, 90], [118, 87], [162, 81], [182, 78], [248, 67], [241, 40], [188, 47], [167, 51], [146, 54], [130, 56], [102, 55]], "text": "lerceoes", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "lercec es", "recog_valid": false, "glyph_recog_text": "lerceoes", "glyph_recog_ld": 0.7777780246910837}, {"polygon": [[248, 30], [248, 30], [262, 65], [354, 50], [342, 23], [271, 25]], "text": "-Benz", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Benz", "recog_valid": false, "glyph_recog_text": "-Benz", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238712.jpg", "caption": "two women playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369789.jpg", "caption": "a large grandfather clock in a room with a fireplace", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238717.jpg", "caption": "a laptop, a bag, a pen, a pen holder, a pen, a pen holder, a pen, a pen holder, a pen, a pen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500877.jpg", "caption": "a banana that has been cut in half", "annotations": [{"polygon": [[110, 118], [125, 122], [142, 124], [157, 122], [172, 118], [183, 114], [186, 137], [171, 141], [160, 144], [150, 144], [139, 144], [124, 142], [113, 140]], "text": "DON'T", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DONK", "recog_valid": false, "glyph_recog_text": "DON'T", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[117, 162], [134, 166], [154, 166], [168, 165], [178, 162], [185, 159], [188, 155], [190, 177], [177, 183], [162, 185], [151, 186], [136, 183], [127, 181], [120, 178]], "text": "BELIEVIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Oievi", "recog_valid": false, "glyph_recog_text": "BEUIEVIN", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369805.jpg", "caption": "a row of motorcycles parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107678.jpg", "caption": "a truck with a large white sign on it", "annotations": [{"polygon": [[0, 310], [0, 345], [50, 343], [51, 354], [61, 355], [61, 343], [72, 343], [74, 337], [82, 344], [93, 343], [98, 335], [95, 316], [80, 315], [68, 317], [52, 318], [43, 318], [27, 319], [10, 311]], "text": "Scope", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Scope", "recog_valid": true, "glyph_recog_text": "Scope", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369823.jpg", "caption": "a stop sign on a pole in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238757.jpg", "caption": "a group of people in pink shirts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238768.jpg", "caption": "a jar of powder and a flower on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369848.jpg", "caption": "a man sitting at a bar with bottles of wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369886.jpg", "caption": "a young boy standing on a skateboard in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238816.jpg", "caption": "a man on a surfboard in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107766.jpg", "caption": "a table with various food items on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238839.jpg", "caption": "a brown bear resting on a log", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501006.jpg", "caption": "a group of people posing with an elephant and holding up a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501015.jpg", "caption": "a man standing in front of a refrigerator with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107838.jpg", "caption": "a large airplane parked on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501085.jpg", "caption": "a tall clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107884.jpg", "caption": "a gas station with motorcycles parked in front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370029.jpg", "caption": "a yellow train engine on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238963.jpg", "caption": "a baseball player on the mound", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238968.jpg", "caption": "a clock hanging from the ceiling of a subway station", "annotations": [{"polygon": [[387, 329], [460, 303], [446, 273], [378, 303], [379, 318]], "text": "GREEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "GREEN", "recog_valid": true, "glyph_recog_text": "GREEN", "glyph_recog_ld": 1.0}, {"polygon": [[372, 335], [313, 355], [305, 337], [358, 314]], "text": "BETHNAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BETHNAL", "recog_valid": true, "glyph_recog_text": "BETHNAL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370045.jpg", "caption": "a man playing a video game on a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107918.jpg", "caption": "a street sign with a street name on it", "annotations": [{"polygon": [[301, 167], [425, 129], [423, 157], [295, 194]], "text": "KENISTON SQUARE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KENISTON", "recog_valid": false, "glyph_recog_text": "KENISTON SQUARE", "glyph_recog_ld": 0.533333644444237}, {"polygon": [[306, 198], [412, 167], [409, 189], [302, 218]], "text": "SQUARE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SQUARE", "recog_valid": true, "glyph_recog_text": "SQUARE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239001.jpg", "caption": "a cow on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239007.jpg", "caption": "a white stove with two burners and a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107941.jpg", "caption": "a sign that says winchester new market", "annotations": [{"polygon": [[203, 152], [217, 113], [229, 107], [266, 104], [305, 112], [318, 158], [282, 177], [240, 178], [211, 160]], "text": "AAA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AA", "recog_valid": false, "glyph_recog_text": "AAA", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[51, 209], [104, 211], [100, 246], [57, 246]], "text": "70", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "70", "recog_valid": true, "glyph_recog_text": "70", "glyph_recog_ld": 1.0}, {"polygon": [[180, 208], [350, 210], [352, 250], [186, 246]], "text": "WINCHE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WINCHE", "recog_valid": true, "glyph_recog_text": "WINCHE", "glyph_recog_ld": 1.0}, {"polygon": [[355, 211], [463, 212], [469, 250], [352, 247]], "text": "STER", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STER", "recog_valid": true, "glyph_recog_text": "STER", "glyph_recog_ld": 1.0}, {"polygon": [[38, 261], [105, 262], [93, 301], [40, 298]], "text": "107", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "107", "recog_valid": true, "glyph_recog_text": "107", "glyph_recog_ld": 1.0}, {"polygon": [[177, 261], [274, 262], [266, 299], [178, 299]], "text": "NEW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NEW", "recog_valid": true, "glyph_recog_text": "NEW", "glyph_recog_ld": 1.0}, {"polygon": [[295, 261], [476, 264], [466, 300], [292, 299]], "text": "MARKET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MARKET", "recog_valid": true, "glyph_recog_text": "MARKET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239013.jpg", "caption": "a living room with a couch, coffee table and a table", "annotations": [{"polygon": [[50, 369], [40, 373], [34, 379], [29, 387], [27, 396], [30, 403], [33, 410], [41, 416], [49, 420], [60, 422], [71, 420], [73, 428], [66, 429], [60, 430], [54, 429], [46, 428], [38, 426], [32, 421], [26, 413], [22, 407], [20, 398], [20, 390], [22, 382], [26, 376], [32, 369], [39, 364], [49, 361]], "text": "PHOTOGRAPHY", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "装", "recog_valid": false, "glyph_recog_text": "ercearnr", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107959.jpg", "caption": "a man skiing down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239040.jpg", "caption": "a street with a green light and a clock tower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370120.jpg", "caption": "a man holding a tennis racket on a tennis court", "annotations": [{"polygon": [[146, 162], [159, 205], [225, 195], [224, 155]], "text": "Vittel", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Vitte", "recog_valid": false, "glyph_recog_text": "Vittel", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[298, 141], [296, 173], [345, 165], [345, 137]], "text": "APOYAHE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "APOY&HE", "recog_valid": false, "glyph_recog_text": "APOYAHE", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370121.jpg", "caption": "a bench sits in front of a lake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107982.jpg", "caption": "a black cat sitting on top of a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370145.jpg", "caption": "three men standing around a bar", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370151.jpg", "caption": "a street sign with three different signs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370171.jpg", "caption": "a cake with a cell phone on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501257.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108049.jpg", "caption": "a bus driving down a street with a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108071.jpg", "caption": "a silver train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501299.jpg", "caption": "a man in a suit and tie talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108101.jpg", "caption": "a family poses in front of a sign that says baja vista", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370258.jpg", "caption": "a bicycle is parked next to a pile of luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239187.jpg", "caption": "a large airplane sitting on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370280.jpg", "caption": "a large jet airplane sitting on the tarmac", "annotations": [{"polygon": [[321, 202], [378, 149], [386, 165], [330, 217]], "text": "Western", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Western", "recog_valid": true, "glyph_recog_text": "Westen", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108170.jpg", "caption": "a man is holding a kite on a roof", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239269.jpg", "caption": "a train on the tracks with a red, white and blue stripe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108210.jpg", "caption": "a lufthansa a380 at the airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108259.jpg", "caption": "a street sign that says center drive", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108287.jpg", "caption": "a bed with brown sheets and pillows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108301.jpg", "caption": "a woman sitting at a desk in front of a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108321.jpg", "caption": "army soldiers in a truck on the highway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370505.jpg", "caption": "a couple of people standing in a room with three beds", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501576.jpg", "caption": "a man sitting at a table with a plate of food", "annotations": [{"polygon": [[202, 189], [203, 221], [308, 216], [306, 185], [229, 187], [202, 189]], "text": "YORK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YORK", "recog_valid": true, "glyph_recog_text": "YORK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239499.jpg", "caption": "a black and white photo of a baseball player swinging a bat", "annotations": [{"polygon": [[57, 389], [54, 426], [61, 429], [97, 429], [102, 420], [101, 389]], "text": "66", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "66", "recog_valid": true, "glyph_recog_text": "66", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501647.jpg", "caption": "a baseball player swinging his bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239504.jpg", "caption": "a white microwave on top of a white refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108451.jpg", "caption": "a baseball player is swinging a bat at a ball", "annotations": [{"polygon": [[0, 186], [29, 188], [28, 222], [-1, 221]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "化", "recog_valid": false, "glyph_recog_text": "c", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108472.jpg", "caption": "a dog sitting in the back of a truck", "annotations": [{"polygon": [[248, 354], [347, 364], [343, 389], [246, 384]], "text": "Plymouth", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Plymouth", "recog_valid": true, "glyph_recog_text": "Plymouth", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239552.jpg", "caption": "a cow laying down in the dirt near a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501698.jpg", "caption": "a woman is standing next to a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239559.jpg", "caption": "two trucks parked in a field with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370634.jpg", "caption": "a black truck parked in a parking lot", "annotations": [{"polygon": [[434, 131], [435, 144], [470, 140], [471, 113], [435, 117]], "text": "wok", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "uok", "recog_valid": false, "glyph_recog_text": "wok", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108492.jpg", "caption": "a skateboarder doing a trick on a ramp", "annotations": [{"polygon": [[56, 299], [156, 300], [156, 331], [55, 331]], "text": "GO211.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "GO211", "recog_valid": false, "glyph_recog_text": "GO211.com", "glyph_recog_ld": 0.5555560493821674}, {"polygon": [[193, 299], [277, 303], [273, 334], [190, 331]], "text": "G0211.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "G0211", "recog_valid": false, "glyph_recog_text": "G0211.com", "glyph_recog_ld": 0.5555560493821674}, {"polygon": [[305, 304], [376, 308], [374, 323], [369, 335], [368, 340], [344, 339], [306, 336], [303, 328]], "text": "GO211.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "C20", "recog_valid": false, "glyph_recog_text": "GO211.com", "glyph_recog_ld": 0.1111120987643347}, {"polygon": [[396, 310], [422, 313], [479, 314], [477, 348], [432, 350], [386, 341], [386, 333]], "text": "GO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "GO:", "recog_valid": false, "glyph_recog_text": "GO", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108500.jpg", "caption": "a truck driving down a street in an asian city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370650.jpg", "caption": "a man on a skateboard doing a trick", "annotations": [{"polygon": [[85, 412], [199, 395], [208, 428], [85, 453]], "text": "ONY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ONY", "recog_valid": true, "glyph_recog_text": "ONY", "glyph_recog_ld": 1.0}, {"polygon": [[138, 449], [149, 469], [211, 453], [209, 437]], "text": "believe", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "belleve", "recog_valid": false, "glyph_recog_text": "belleve", "glyph_recog_ld": 1.0}, {"polygon": [[85, 467], [90, 480], [137, 469], [129, 449]], "text": "nake.", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "nake", "recog_valid": false, "glyph_recog_text": "nake.", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239580.jpg", "caption": "a skateboarder is doing a trick in front of a building", "annotations": [{"polygon": [[103, 449], [105, 472], [143, 453], [142, 438]], "text": "boogaloos", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "loogoits", "recog_valid": false, "glyph_recog_text": "cpogafloos", "glyph_recog_ld": 0.3000006999993}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370652.jpg", "caption": "a food processor with a bowl of sliced fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239584.jpg", "caption": "a man is working on a bathroom in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239593.jpg", "caption": "a train is on the tracks in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239600.jpg", "caption": "a microwave and a toaster oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108540.jpg", "caption": "a man standing in front of a bus stop with a shopping bag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370722.jpg", "caption": "a person riding a dirt bike on a dirt track", "annotations": [{"polygon": [[203, 264], [210, 285], [238, 267], [230, 247]], "text": "81", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "81", "recog_valid": true, "glyph_recog_text": "81", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370727.jpg", "caption": "a man holding a hot dog and a soda", "annotations": [{"polygon": [[146, 307], [164, 299], [185, 293], [218, 288], [217, 320], [203, 320], [176, 325], [154, 335], [148, 339]], "text": "RED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RED", "recog_valid": true, "glyph_recog_text": "RED", "glyph_recog_ld": 1.0}, {"polygon": [[220, 287], [241, 288], [262, 288], [287, 292], [306, 296], [331, 304], [334, 339], [314, 331], [286, 325], [250, 320], [220, 321]], "text": "WINGS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WINGS", "recog_valid": true, "glyph_recog_text": "WINGS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501801.jpg", "caption": "a green bus with a white front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370738.jpg", "caption": "a city street with buildings and people walking on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370739.jpg", "caption": "a street sign is shown in an infrared image", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239676.jpg", "caption": "a blue bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370760.jpg", "caption": "a building with a clock on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108620.jpg", "caption": "a girl with glasses and a bat holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239693.jpg", "caption": "a woman standing next to a giant teddy bear", "annotations": [{"polygon": [[172, 1], [172, 1], [174, 17], [251, 38], [256, 18], [199, 0], [172, 1]], "text": "TEDDY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "TEDDY", "recog_valid": true, "glyph_recog_text": "TEDDY", "glyph_recog_ld": 1.0}, {"polygon": [[263, 20], [262, 42], [325, 61], [327, 61], [328, 44], [263, 19]], "text": "BEARS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BEARS", "recog_valid": true, "glyph_recog_text": "BEARS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501842.jpg", "caption": "a truck driving down a street with cars behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370790.jpg", "caption": "a toy construction site with trucks and other vehicles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501864.jpg", "caption": "a black and white photo of horses pulling a carriage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108649.jpg", "caption": "oakland athletics hat and sunglasses", "annotations": [{"polygon": [[236, 49], [250, 38], [268, 39], [325, 74], [327, 91], [317, 113], [262, 127], [247, 128], [233, 119]], "text": "A's", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "A。", "recog_valid": false, "glyph_recog_text": "A's", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239739.jpg", "caption": "a bird is perched on a sign", "annotations": [{"polygon": [[213, 293], [277, 342], [259, 424], [199, 377]], "text": "NO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "ON", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370824.jpg", "caption": "british airways airbus a320-214", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108680.jpg", "caption": "a blue and white train is parked in a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239757.jpg", "caption": "a baby boy sitting in a crib with a toy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239771.jpg", "caption": "a bike is parked next to a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501917.jpg", "caption": "a group of people on stand up paddle boards in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108708.jpg", "caption": "a man playing a video game with a group of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239792.jpg", "caption": "a boy doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370868.jpg", "caption": "a woman walking with an elephant in a zoo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239801.jpg", "caption": "a piece of banana bread on a plate with a fork", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239805.jpg", "caption": "a man standing in a living room with a wii controller", "annotations": [{"polygon": [[85, 219], [81, 190], [92, 188], [105, 186], [114, 186], [128, 188], [145, 191], [156, 197], [158, 204], [159, 214], [152, 212], [140, 210], [132, 208], [121, 205], [112, 205], [98, 209]], "text": "palm", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "palfn!", "recog_valid": false, "glyph_recog_text": "palm", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239811.jpg", "caption": "a white bicycle is parked on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501967.jpg", "caption": "a bunk bed with a sleeping bag and luggage on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501971.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239831.jpg", "caption": "a delta airplane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108762.jpg", "caption": "a woman laying in a hospital bed holding a picture", "annotations": [{"polygon": [[83, 267], [139, 252], [153, 304], [99, 318]], "text": "V", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "V", "recog_valid": true, "glyph_recog_text": "v", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[403, 234], [406, 227], [411, 219], [415, 211], [432, 204], [437, 218], [423, 225], [418, 233], [411, 243]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "gppy", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[412, 249], [413, 239], [420, 231], [426, 226], [439, 221], [456, 218], [463, 217], [461, 228], [445, 230], [436, 231], [426, 237], [420, 244], [418, 258]], "text": "Firement", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "pamene", "recog_valid": false, "glyph_recog_text": "Firement", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239845.jpg", "caption": "a man is surprised by a birthday cake with lit candles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370918.jpg", "caption": "a group of people sitting on the snow", "annotations": [{"polygon": [[0, 302], [3, 315], [43, 302], [72, 291], [68, 279]], "text": "OLDEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "OLDEN", "recog_valid": true, "glyph_recog_text": "OLDEN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239849.jpg", "caption": "a woman is playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239854.jpg", "caption": "a street sign and a traffic light on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239870.jpg", "caption": "a plane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108826.jpg", "caption": "kiteboarding at the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108841.jpg", "caption": "motorcycles parked in front of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371015.jpg", "caption": "a man on a skateboard doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239953.jpg", "caption": "a woman in a pink dress holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108896.jpg", "caption": "a red truck with a large sign on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502134.jpg", "caption": "a man dressed as santa claus riding on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240043.jpg", "caption": "a man standing in front of a refrigerator", "annotations": [{"polygon": [[176, 145], [172, 192], [54, 185], [58, 135]], "text": "SCHLAFLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SCHLAFY", "recog_valid": false, "glyph_recog_text": "SCHLAFLY", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502197.jpg", "caption": "a man and woman standing on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240057.jpg", "caption": "two people holding up two samsung galaxy s4 phones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240080.jpg", "caption": "a group of brown teddy bears hanging from a string", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109011.jpg", "caption": "a piece of cake on a plate with a spoon", "annotations": [{"polygon": [[164, 148], [163, 161], [104, 191], [99, 181]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HAPPY", "recog_valid": true, "glyph_recog_text": "HnKPY", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[180, 143], [183, 154], [234, 135], [289, 125], [290, 113], [236, 124]], "text": "BIRTHDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIRTHDAY", "recog_valid": true, "glyph_recog_text": "BIRTHDAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502232.jpg", "caption": "a glass case with various vases and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502248.jpg", "caption": "a van with a bee on the side is parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109041.jpg", "caption": "a man sitting in a car on the phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502262.jpg", "caption": "two young men in tuxedos posing for a photo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502271.jpg", "caption": "a woman wearing a hat and holding a wrench", "annotations": [{"polygon": [[248, 441], [262, 460], [286, 449], [265, 429]], "text": "T", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "5", "recog_valid": false, "glyph_recog_text": "4", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371217.jpg", "caption": "a yellow fire hydrant in front of a large rock formation", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371222.jpg", "caption": "a bowl of bananas and plums", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502307.jpg", "caption": "a white scooter parked next to a bike rack", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502325.jpg", "caption": "baker street tube sign", "annotations": [{"polygon": [[57, 210], [249, 215], [269, 303], [58, 268]], "text": "BAKER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BAKER", "recog_valid": true, "glyph_recog_text": "BAKER", "glyph_recog_ld": 1.0}, {"polygon": [[340, 218], [501, 224], [483, 344], [333, 312]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "ST", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371254.jpg", "caption": "a train has texts on it", "annotations": [{"polygon": [[149, 307], [246, 323], [244, 351], [149, 332]], "text": "INTERCITRS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "iNTERCiT2S", "recog_valid": false, "glyph_recog_text": "INTERCITRS", "glyph_recog_ld": 0.7000002999996999}, {"polygon": [[291, 326], [365, 334], [360, 363], [288, 351]], "text": "SNCF", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SNCF", "recog_valid": true, "glyph_recog_text": "SNCF", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109119.jpg", "caption": "a woman holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109136.jpg", "caption": "a double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502360.jpg", "caption": "a person is standing on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109145.jpg", "caption": "a kitchen with a black counter top and white cabinets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502362.jpg", "caption": "a man riding a bike on the beach at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371295.jpg", "caption": "a cake with a fire hydrant on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240241.jpg", "caption": "a motorcycle parked in the grass next to a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371317.jpg", "caption": "a man holding a tennis racket on a tennis court", "annotations": [{"polygon": [[100, 108], [101, 128], [265, 140], [267, 118]], "text": "PARIBAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PARIBAS", "recog_valid": true, "glyph_recog_text": "PARIBAS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109174.jpg", "caption": "a person riding a bike on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240247.jpg", "caption": "a living room with a fireplace, a television, and a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109177.jpg", "caption": "a black and red steam engine train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502409.jpg", "caption": "a group of people dressed in pink and blue", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371348.jpg", "caption": "leave me by james kennedy", "annotations": [{"polygon": [[63, 426], [64, 350], [267, 350], [267, 426]], "text": "leave", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "leaue", "recog_valid": false, "glyph_recog_text": "leave", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[303, 380], [416, 379], [416, 426], [305, 426]], "text": "me", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "me", "recog_valid": true, "glyph_recog_text": "m e", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502425.jpg", "caption": "a red and white fire hydrant on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240285.jpg", "caption": "a person is cutting a cake with a knife", "annotations": [{"polygon": [[78, 131], [95, 141], [51, 162], [38, 152]], "text": "Mike", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "BW", "recog_valid": false, "glyph_recog_text": "Mike", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502441.jpg", "caption": "a bunch of oranges are sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371376.jpg", "caption": "a laptop computer sitting on a table next to a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240331.jpg", "caption": "a child sitting at a table with a hot dog and drink", "annotations": [{"polygon": [[171, 321], [105, 364], [125, 382], [147, 383], [169, 360], [171, 344]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "9y", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[170, 272], [171, 285], [159, 296], [132, 307], [109, 310], [98, 303], [100, 295], [145, 287], [169, 272]], "text": "Nathan", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "nathan", "recog_valid": false, "glyph_recog_text": "Nathan", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502495.jpg", "caption": "a cat wearing a paper hat", "annotations": [{"polygon": [[298, 96], [279, 139], [265, 139], [255, 134], [244, 137], [230, 140], [222, 149], [209, 148], [206, 153], [166, 150], [195, 85], [239, 83], [238, 94], [246, 91], [257, 96], [263, 97], [271, 86], [282, 87], [293, 92]], "text": "BUEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "308", "recog_valid": false, "glyph_recog_text": "BUEN", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[418, 127], [411, 166], [405, 165], [398, 177], [390, 172], [385, 183], [375, 189], [366, 187], [361, 194], [294, 212], [284, 167], [322, 164], [337, 163], [346, 170], [352, 159], [359, 159], [359, 165], [361, 173], [364, 162], [377, 152], [385, 148], [392, 143]], "text": "MORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "380W", "recog_valid": false, "glyph_recog_text": "MORE", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[353, 210], [390, 190], [390, 222], [346, 240], [345, 232], [352, 229]], "text": "IT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LT", "recog_valid": false, "glyph_recog_text": "IT", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[308, 241], [305, 234], [317, 230], [306, 224], [308, 217], [322, 212], [336, 212], [341, 216], [336, 218], [326, 221], [332, 224], [335, 231], [327, 236], [317, 243]], "text": "S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S", "recog_valid": true, "glyph_recog_text": "s", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502554.jpg", "caption": "a stop sign with a street sign on top of it", "annotations": [{"polygon": [[209, 320], [292, 319], [293, 280], [208, 280]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371482.jpg", "caption": "a street light and a sign on a pole", "annotations": [{"polygon": [[2, 117], [30, 114], [29, 161], [0, 162]], "text": "11", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "二", "recog_valid": false, "glyph_recog_text": "一-", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[87, 119], [124, 115], [114, 150], [82, 152], [82, 127]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "ST", "glyph_recog_ld": 1.0}, {"polygon": [[55, 270], [112, 265], [105, 296], [58, 298]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[1, 276], [44, 273], [43, 300], [1, 305]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240423.jpg", "caption": "a welcome sign for the neighborhood of north highland", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240439.jpg", "caption": "a black and white photo of a steam engine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371511.jpg", "caption": "a woman playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371514.jpg", "caption": "a girl in a blue uniform is about to catch a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502588.jpg", "caption": "a southwest airlines plane on the runway", "annotations": [{"polygon": [[46, 192], [55, 183], [118, 256], [107, 265]], "text": "SOUTHWEST", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SOUTHWEST", "recog_valid": true, "glyph_recog_text": "SOUTHWEST", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109414.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240490.jpg", "caption": "a young boy is holding a bat and standing on a tee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371603.jpg", "caption": "several fire trucks parked on a road near trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502680.jpg", "caption": "a dog sitting on the bow of a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502712.jpg", "caption": "a man playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109506.jpg", "caption": "a street sign with a meter sign and a pay meter sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109509.jpg", "caption": "a child standing next to a school bus in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240586.jpg", "caption": "a truck with a flatbed trailer parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109524.jpg", "caption": "a boy blowing out candles on a birthday cake", "annotations": [{"polygon": [[168, 202], [163, 221], [152, 228], [124, 223], [129, 196]], "text": "95", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "95", "recog_valid": true, "glyph_recog_text": "95", "glyph_recog_ld": 1.0}, {"polygon": [[232, 202], [232, 210], [216, 220], [170, 210], [158, 197], [166, 182], [197, 188]], "text": "Lightning", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ligktning", "recog_valid": false, "glyph_recog_text": "Lighfning", "glyph_recog_ld": 0.6666670370366254}, {"polygon": [[239, 222], [239, 237], [194, 230], [168, 221], [180, 207], [206, 211]], "text": "McQueen", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MeOueen", "recog_valid": false, "glyph_recog_text": "McQueen", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371672.jpg", "caption": "a young boy holding a baseball glove", "annotations": [{"polygon": [[219, 446], [220, 469], [228, 467], [252, 454], [269, 454], [276, 448], [277, 435], [270, 430], [247, 434], [239, 442]], "text": "RAWLINGS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Pawings", "recog_valid": false, "glyph_recog_text": "RAWUINGS", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[111, 461], [112, 491], [411, 492], [407, 462]], "text": "ICCSPORTS.COM", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ICICIS PORITS.ICOM", "recog_valid": false, "glyph_recog_text": "ICCSPORTS.COM", "glyph_recog_ld": 0.7222223765431242}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109553.jpg", "caption": "two pizzas on a wooden table with grapes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502772.jpg", "caption": "a plate of grapefruit and a donut on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371706.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240682.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371758.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[171, 186], [158, 244], [181, 264], [384, 262], [373, 186], [219, 182], [187, 178]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[277, 361], [281, 392], [331, 387], [332, 360]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[231, 286], [228, 302], [294, 305], [309, 321], [315, 282]], "text": "JENNY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "guy", "recog_valid": false, "glyph_recog_text": "JENNY", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240689.jpg", "caption": "a young girl flying a kite on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109640.jpg", "caption": "a bus with a sign has texts", "annotations": [{"polygon": [[218, 36], [216, 79], [172, 81], [167, 38]], "text": "177", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "177", "recog_valid": true, "glyph_recog_text": "177", "glyph_recog_ld": 1.0}, {"polygon": [[253, 35], [245, 61], [254, 71], [288, 67], [290, 31]], "text": "JET", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "JET", "recog_valid": true, "glyph_recog_text": "JET", "glyph_recog_ld": 1.0}, {"polygon": [[294, 32], [298, 67], [357, 60], [366, 40], [360, 25]], "text": "PORP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PROP", "recog_valid": false, "glyph_recog_text": "PORP", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[367, 26], [370, 58], [412, 54], [414, 44], [408, 23]], "text": "LAB", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "LAB", "recog_valid": true, "glyph_recog_text": "LAB", "glyph_recog_ld": 1.0}, {"polygon": [[414, 323], [417, 346], [478, 326], [478, 318], [472, 311]], "text": "Metro", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Me tro", "recog_valid": false, "glyph_recog_text": "Metre", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[295, 366], [301, 393], [328, 388], [321, 359]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "M", "recog_valid": true, "glyph_recog_text": "3", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502860.jpg", "caption": "a coffee mug sitting on a table next to a remote control", "annotations": [{"polygon": [[85, 235], [87, 258], [187, 238], [179, 218]], "text": "now", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TON", "recog_valid": false, "glyph_recog_text": "n a w", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371789.jpg", "caption": "a cat sitting in a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502866.jpg", "caption": "a dog is standing on the bow of a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371800.jpg", "caption": "a cow with a black and white face", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502933.jpg", "caption": "a person holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371863.jpg", "caption": "henry scrupp flowers, san francisco, california", "annotations": [{"polygon": [[134, 179], [130, 210], [251, 212], [253, 188]], "text": "HENRY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HENRY", "recog_valid": true, "glyph_recog_text": "HENRY", "glyph_recog_ld": 1.0}, {"polygon": [[72, 209], [75, 329], [475, 323], [457, 219]], "text": "HOWERS ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3lowers", "recog_valid": false, "glyph_recog_text": "HOWERS", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240802.jpg", "caption": "a street sign is shown next to a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371874.jpg", "caption": "a group of men playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109746.jpg", "caption": "a baseball player throwing a ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502964.jpg", "caption": "a row of colorful inflatable boats", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371923.jpg", "caption": "a man on skis racing another man on skis", "annotations": [{"polygon": [[212, -1], [200, 35], [358, 64], [375, 20], [351, 0]], "text": "phenix", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Jenix", "recog_valid": false, "glyph_recog_text": "phenix", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371928.jpg", "caption": "a banana, orange and juice are on a table", "annotations": [{"polygon": [[218, 128], [218, 158], [317, 160], [318, 130]], "text": "FREE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FREE", "recog_valid": true, "glyph_recog_text": "FREE", "glyph_recog_ld": 1.0}, {"polygon": [[188, 382], [184, 395], [305, 412], [306, 398]], "text": "HANDMADE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "HANDMADE", "recog_valid": true, "glyph_recog_text": "HANDMADE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503021.jpg", "caption": "a skateboarder in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371955.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[94, 232], [109, 213], [116, 214], [133, 229], [122, 246], [114, 248], [95, 234]], "text": "15", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "15", "recog_valid": true, "glyph_recog_text": "15", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240887.jpg", "caption": "a man and a woman are posing for a picture", "annotations": [{"polygon": [[76, 475], [76, 510], [148, 510], [148, 475]], "text": "LIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "LIVE", "recog_valid": true, "glyph_recog_text": "LIVE", "glyph_recog_ld": 1.0}, {"polygon": [[223, 474], [222, 508], [433, 509], [433, 473]], "text": "DIGITALLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DIGITALLY", "recog_valid": true, "glyph_recog_text": "DIGITALLY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240902.jpg", "caption": "a dog sitting on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503061.jpg", "caption": "a bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503068.jpg", "caption": "a man selling vegetables on a street in pakistan", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372003.jpg", "caption": "a man driving a horse drawn carriage down a street", "annotations": [{"polygon": [[19, 158], [78, 146], [78, 185], [18, 193]], "text": "ESOUIRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "ESOURE", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109862.jpg", "caption": "can i surf this is a girl?", "annotations": [{"polygon": [[8, 95], [507, 95], [500, 168], [4, 168]], "text": "SURFING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SURFING", "recog_valid": true, "glyph_recog_text": "SURFING", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503091.jpg", "caption": "a small plane sitting on top of a green field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240952.jpg", "caption": "a double decker bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503099.jpg", "caption": "a side car with a man sitting in it", "annotations": [{"polygon": [[39, 166], [103, 114], [96, 83], [73, 85], [34, 111]], "text": "iXi", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Oixi", "recog_valid": false, "glyph_recog_text": "iXi", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503104.jpg", "caption": "a truck with a tiger in the back of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372045.jpg", "caption": "a woman holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109908.jpg", "caption": "a group of police officers riding motorcycles down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372058.jpg", "caption": "a person holding a banana and a glass of milk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241001.jpg", "caption": "a person on skis standing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241003.jpg", "caption": "a room with a tv, a bookcase, and boxes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503149.jpg", "caption": "an old black and white photo of a busy city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109950.jpg", "caption": "a girl in black shirt and black shorts is hitting a tennis ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109971.jpg", "caption": "a baseball player is throwing the ball", "annotations": [{"polygon": [[207, 139], [207, 139], [207, 131], [215, 114], [225, 113], [241, 113], [246, 124], [249, 151], [238, 150], [218, 150], [207, 144]], "text": "43", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "43", "recog_valid": true, "glyph_recog_text": "43", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241046.jpg", "caption": "a cat and a bird flying over a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110004.jpg", "caption": "a large clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503250.jpg", "caption": "two pictures of a fire hydrant with decorative designs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372180.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503283.jpg", "caption": "a surfer riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372219.jpg", "caption": "a boy with a parrot on his shoulder", "annotations": [{"polygon": [[112, 113], [136, 102], [153, 110], [159, 120], [127, 133]], "text": "S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "S", "recog_valid": true, "glyph_recog_text": "s", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372229.jpg", "caption": "a man holding a dog on his shoulders", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241161.jpg", "caption": "a busy street with many signs and people walking around", "annotations": [{"polygon": [[480, 201], [482, 216], [512, 193], [511, 174]], "text": "OFFICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "099CF", "recog_valid": false, "glyph_recog_text": "OFFIC", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372234.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110105.jpg", "caption": "a group of people on skis and snowboards", "annotations": [{"polygon": [[344, 200], [336, 217], [315, 210], [300, 195], [290, 189], [291, 182], [312, 182]], "text": "one", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "one", "recog_valid": true, "glyph_recog_text": "one", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110111.jpg", "caption": "a display case filled with various types of doughnuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110123.jpg", "caption": "a young boy sitting at a desk with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110136.jpg", "caption": "a man on the ground", "annotations": [{"polygon": [[161, 176], [163, 112], [513, 111], [513, 176]], "text": "AEGON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AEGON", "recog_valid": true, "glyph_recog_text": "AEGON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241221.jpg", "caption": "a group of people riding motorcycles down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241261.jpg", "caption": "four men in suits and ties standing in front of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241271.jpg", "caption": "a man sitting at a table with a large pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503421.jpg", "caption": "a man on a skateboard doing a trick on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241281.jpg", "caption": "a bus on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241283.jpg", "caption": "man wearing a mask", "annotations": [{"polygon": [[186, 301], [194, 309], [215, 297], [244, 280], [239, 271], [210, 283]], "text": "Model", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Business", "recog_valid": false, "glyph_recog_text": "赞oael", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[196, 309], [204, 318], [219, 308], [237, 295], [231, 285]], "text": "Model", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Model", "recog_valid": true, "glyph_recog_text": "转ste)", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[205, 318], [211, 328], [232, 312], [252, 302], [272, 291], [265, 280], [244, 291], [225, 301]], "text": "Generation", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Generation", "recog_valid": true, "glyph_recog_text": "Generstion", "glyph_recog_ld": 0.9000000999999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110217.jpg", "caption": "a stop sign and a fence with a sign on it", "annotations": [{"polygon": [[317, 246], [366, 246], [405, 244], [428, 246], [428, 268], [411, 289], [318, 291]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372374.jpg", "caption": "a clock is on a pole in front of a tall building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241305.jpg", "caption": "two men playing tennis on a blue court", "annotations": [{"polygon": [[461, 412], [467, 393], [509, 395], [506, 411], [489, 412], [477, 426], [473, 424], [479, 413]], "text": "Rupa", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Rupa", "recog_valid": true, "glyph_recog_text": "Rupa", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110233.jpg", "caption": "a bird is perched on a box with a sign that says 'freshly baked'", "annotations": [{"polygon": [[346, 404], [342, 418], [285, 402], [286, 387], [335, 398]], "text": "Freshly", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Freshly", "recog_valid": true, "glyph_recog_text": "Freshly", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372381.jpg", "caption": "a laptop with a cell phone sitting on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372390.jpg", "caption": "a parking meter is shown in front of a blue car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110251.jpg", "caption": "a playground with a sign that says playground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241329.jpg", "caption": "a man and woman on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372404.jpg", "caption": "a man and woman cutting a cake at a wedding", "annotations": [{"polygon": [[200, 499], [200, 492], [210, 494], [268, 493], [272, 487], [278, 485], [279, 480], [271, 465], [264, 465], [221, 468], [200, 471], [182, 489], [186, 496], [199, 501]], "text": "Heather", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Qleather", "recog_valid": false, "glyph_recog_text": "Heather", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241345.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[269, 211], [269, 211], [320, 207], [321, 175], [259, 177], [260, 199]], "text": "Cub", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cub", "recog_valid": true, "glyph_recog_text": "Cub", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372430.jpg", "caption": "three boys riding skateboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241402.jpg", "caption": "a man riding a skateboard on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241422.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[253, 217], [253, 217], [279, 206], [273, 179], [245, 194], [245, 194]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "13", "recog_valid": true, "glyph_recog_text": "13", "glyph_recog_ld": 1.0}, {"polygon": [[7, 189], [7, 189], [138, 191], [137, 162], [9, 155]], "text": "Motors", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Metors", "recog_valid": false, "glyph_recog_text": "Motors", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372510.jpg", "caption": "a street sign that says albert camus", "annotations": [{"polygon": [[117, 252], [106, 284], [277, 279], [278, 248], [119, 253]], "text": "ALBERT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALBERT", "recog_valid": true, "glyph_recog_text": "ALBERT", "glyph_recog_ld": 1.0}, {"polygon": [[291, 248], [291, 279], [430, 274], [426, 245], [294, 247]], "text": "CAMUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CAMUS", "recog_valid": true, "glyph_recog_text": "CAMUS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503605.jpg", "caption": "a man wearing a striped shirt and a blue and red tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372558.jpg", "caption": "two boats are sitting on the beach", "annotations": [{"polygon": [[318, 248], [351, 235], [356, 242], [358, 254], [322, 265]], "text": "RX60", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RX60", "recog_valid": true, "glyph_recog_text": "RX6O", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241491.jpg", "caption": "a man blowing out candles on a birthday cake", "annotations": [{"polygon": [[111, 117], [116, 188], [279, 200], [319, 203], [318, 111]], "text": "PDX.rb", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PDX.rb", "recog_valid": true, "glyph_recog_text": "PDX.rb", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372583.jpg", "caption": "a man on a snowboard in the snow", "annotations": [{"polygon": [[383, 3], [356, 179], [363, 179], [377, 148], [389, 109], [409, 1]], "text": "AVERIY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "--m", "recog_valid": false, "glyph_recog_text": "AVERIY", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[88, 243], [116, 228], [112, 211], [86, 224], [87, 245]], "text": "VITY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "/ITY", "recog_valid": false, "glyph_recog_text": "VITY", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503674.jpg", "caption": "a group of workers are working on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372603.jpg", "caption": "two women walking down the sidewalk", "annotations": [{"polygon": [[153, 11], [151, 35], [184, 61], [186, 37]], "text": "BAKERY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "利国", "recog_valid": false, "glyph_recog_text": "BAKERY", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241531.jpg", "caption": "a cow statue in front of a store", "annotations": [{"polygon": [[133, 82], [133, 27], [263, 40], [261, 93]], "text": "ROXY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ROXY", "recog_valid": true, "glyph_recog_text": "ROXY", "glyph_recog_ld": 1.0}, {"polygon": [[277, 102], [295, 44], [400, 58], [394, 106]], "text": "SEXY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SEXY", "recog_valid": true, "glyph_recog_text": "SEXY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110483.jpg", "caption": "a yellow train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372635.jpg", "caption": "a toilet with a fish bowl in it", "annotations": [{"polygon": [[128, 116], [128, 138], [166, 146], [196, 139], [193, 118], [170, 115]], "text": "Right", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Right", "recog_valid": true, "glyph_recog_text": "Right", "glyph_recog_ld": 1.0}, {"polygon": [[166, 153], [169, 171], [203, 169], [210, 176], [248, 167], [250, 151], [231, 145], [174, 150]], "text": "acrylic", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "acrylic", "recog_valid": true, "glyph_recog_text": "acrylic", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503715.jpg", "caption": "a brown bear leaning on a log", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110500.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "annotations": [{"polygon": [[204, 301], [204, 264], [311, 260], [313, 283]], "text": "Braves", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Branu", "recog_valid": false, "glyph_recog_text": "Braves", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241595.jpg", "caption": "a blender with a lid on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372673.jpg", "caption": "two zebras standing in a field with a tree in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110540.jpg", "caption": "a large display of vegetables and fruits in a market", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241617.jpg", "caption": "a white cake with a bird on top", "annotations": [{"polygon": [[344, 223], [356, 245], [361, 273], [339, 283], [322, 223]], "text": "Jan", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tn", "recog_valid": false, "glyph_recog_text": "Jan", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372691.jpg", "caption": "a green and yellow train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503782.jpg", "caption": "a swiss airways airbus a320-214", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372723.jpg", "caption": "a fire hydrant in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241685.jpg", "caption": "a train traveling down the tracks near a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110616.jpg", "caption": "a yellow fire hydrant on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503834.jpg", "caption": "a double decker bus with a large advertisement on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241720.jpg", "caption": "a man and a woman standing next to a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241723.jpg", "caption": "a man in a crown is sitting on the seats of a bus", "annotations": [{"polygon": [[106, 140], [444, 138], [445, 167], [109, 168]], "text": "www.ulli-busreisen", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "n92i912ud-illu.www", "recog_valid": false, "glyph_recog_text": "www.ulli-busreisen", "glyph_recog_ld": 5.555552469171943e-07}, {"polygon": [[215, 345], [303, 331], [303, 358], [225, 371]], "text": "California", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ralioonia", "recog_valid": false, "glyph_recog_text": "California", "glyph_recog_ld": 0.7000002999996999}, {"polygon": [[63, 473], [62, 500], [77, 510], [92, 498], [82, 481]], "text": "GT.", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "GT", "recog_valid": false, "glyph_recog_text": "GT.", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372804.jpg", "caption": "two people on a motorbike in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503883.jpg", "caption": "a large clock in a building with statues of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372829.jpg", "caption": "three boys sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241761.jpg", "caption": "a busy city street with people walking and crossing the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503906.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[82, 180], [31, 270], [109, 315], [166, 237]], "text": "18", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "18", "recog_valid": true, "glyph_recog_text": "o", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241790.jpg", "caption": "a plane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110723.jpg", "caption": "a refrigerator with its doors open", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241809.jpg", "caption": "a suitcase with clothes and shoes in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503961.jpg", "caption": "a man in a red outfit is riding a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110790.jpg", "caption": "a person's hand on a laptop keyboard", "annotations": [{"polygon": [[3, 66], [86, 70], [78, 100], [0, 99]], "text": "NSI", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "NS!", "recog_valid": false, "glyph_recog_text": "NSI", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[125, 104], [118, 135], [1, 133], [0, 100], [108, 100]], "text": "BEER", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "EER", "recog_valid": false, "glyph_recog_text": "BEER", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[37, 136], [28, 175], [143, 173], [148, 137]], "text": "NTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NTS", "recog_valid": true, "glyph_recog_text": "NTS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372943.jpg", "caption": "a football player in green and black uniform holding a ball", "annotations": [{"polygon": [[259, 249], [320, 250], [313, 349], [250, 342]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OP", "recog_valid": false, "glyph_recog_text": "0", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504025.jpg", "caption": "hotel picic, dallas, tx", "annotations": [{"polygon": [[273, 99], [305, 97], [311, 286], [278, 289]], "text": "HOTEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TOFWJ", "recog_valid": false, "glyph_recog_text": "工OFW_", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[281, 328], [313, 328], [317, 448], [284, 448]], "text": "PIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "0-U", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504039.jpg", "caption": "a table with food and drinks on it", "annotations": [{"polygon": [[272, 438], [298, 433], [347, 508], [340, 510], [303, 492]], "text": "Orange", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Cromge", "recog_valid": false, "glyph_recog_text": "Orange", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241897.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110829.jpg", "caption": "a clock on the wall above a fireplace in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241904.jpg", "caption": "a man holding a sandwich and french fries", "annotations": [{"polygon": [[98, 17], [96, 36], [126, 53], [125, 36]], "text": "BALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "BALL", "recog_valid": true, "glyph_recog_text": "BALL", "glyph_recog_ld": 1.0}, {"polygon": [[128, 39], [126, 56], [157, 75], [155, 59]], "text": "GAME", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "GAME", "recog_valid": true, "glyph_recog_text": "GAME", "glyph_recog_ld": 1.0}, {"polygon": [[105, 46], [105, 61], [149, 87], [150, 73]], "text": "TODAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "TODAY", "recog_valid": true, "glyph_recog_text": "TODAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241905.jpg", "caption": "two men standing next to a table full of bread", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110841.jpg", "caption": "a man in a white shirt and cap holding a tennis racket", "annotations": [{"polygon": [[334, 152], [436, 145], [432, 229], [344, 234]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "0U", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241919.jpg", "caption": "a pair of scissors, a ruler and a piece of paper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504093.jpg", "caption": "a street sign with a mask on it", "annotations": [{"polygon": [[347, 140], [345, 163], [392, 173], [395, 153]], "text": "LINDQUIST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LINDQUIST", "recog_valid": true, "glyph_recog_text": "LNDQUIET", "glyph_recog_ld": 0.7777780246910837}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504099.jpg", "caption": "a white truck driving on a dirt road", "annotations": [{"polygon": [[102, 242], [102, 242], [105, 275], [136, 272], [134, 239]], "text": "30", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "30", "recog_valid": true, "glyph_recog_text": "3", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110886.jpg", "caption": "a sandwich and a can of soda in a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373033.jpg", "caption": "google street view bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241963.jpg", "caption": "a parking meter with a sticker of a man pointing a gun", "annotations": [{"polygon": [[237, 86], [240, 170], [296, 167], [292, 84]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "Q", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110894.jpg", "caption": "a group of people working in a restaurant kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504123.jpg", "caption": "a female skier in red and black is going down the slope", "annotations": [{"polygon": [[295, 217], [305, 229], [322, 241], [338, 250], [335, 258], [340, 257], [340, 260], [331, 258], [314, 251], [304, 244], [295, 233], [291, 228]], "text": "LAYAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LAVA", "recog_valid": false, "glyph_recog_text": "LAYAL", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504130.jpg", "caption": "a car driving down a road with a sign on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110926.jpg", "caption": "a basket of vegetables with a tag on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242001.jpg", "caption": "us navy f-14 tomcat aircraft, uss nc-17, uss nc-17-1, uss nc-17-", "annotations": [{"polygon": [[119, 374], [119, 374], [222, 378], [217, 340], [120, 328]], "text": "USN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "USN", "recog_valid": true, "glyph_recog_text": "USN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504147.jpg", "caption": "two cats sleeping on a pink blanket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504165.jpg", "caption": "a young boy standing in front of a refrigerator", "annotations": [{"polygon": [[447, 351], [487, 364], [478, 385], [445, 372]], "text": "Family", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "forvily", "recog_valid": false, "glyph_recog_text": "Famil", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373099.jpg", "caption": "a bathroom with a toilet and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242029.jpg", "caption": "a bus with graffiti on it", "annotations": [{"polygon": [[23, 325], [33, 319], [46, 312], [60, 308], [76, 304], [96, 301], [114, 300], [138, 300], [168, 300], [171, 326], [148, 326], [121, 325], [96, 325], [87, 325], [70, 325], [59, 328], [43, 330], [29, 334]], "text": "VVRRRRRR", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "wURRRF ", "recog_valid": false, "glyph_recog_text": "VVRRRRRR", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242074.jpg", "caption": "a group of young baseball players sitting on the bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242079.jpg", "caption": "a british airways plane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242090.jpg", "caption": "a group of men standing around a fruit stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504271.jpg", "caption": "a remote control sitting on a table in front of a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373199.jpg", "caption": "a man and a boy in a kitchen preparing a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242167.jpg", "caption": "a pizza sitting in a box on top of a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504318.jpg", "caption": "a baseball player sliding into home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504338.jpg", "caption": "three bow ties on a piece of paper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373267.jpg", "caption": "a train is pulling into a station with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242203.jpg", "caption": "a kitchen with a stove, refrigerator, and microwave", "annotations": [{"polygon": [[462, 115], [462, 85], [501, 85], [502, 115]], "text": "K", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "K", "recog_valid": true, "glyph_recog_text": "K", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242216.jpg", "caption": "a chalk board with a clock and a sign that says congratulations", "annotations": [{"polygon": [[207, 194], [207, 194], [205, 215], [223, 213], [224, 224], [234, 223], [236, 209], [299, 207], [299, 197], [208, 194]], "text": "Congratulate", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cngrotudohoe", "recog_valid": false, "glyph_recog_text": "Congratulate", "glyph_recog_ld": 0.5000004166663194}, {"polygon": [[206, 240], [204, 259], [222, 271], [244, 270], [293, 256], [291, 242], [216, 237], [206, 240]], "text": "Engagement", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "kaspgement", "recog_valid": false, "glyph_recog_text": "Engagemen", "glyph_recog_ld": 0.5000004999995}, {"polygon": [[210, 274], [207, 305], [220, 311], [224, 309], [225, 299], [247, 296], [239, 284], [219, 274], [210, 274]], "text": "Sam", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "Sam", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[262, 274], [262, 307], [300, 308], [302, 277], [262, 274]], "text": "Steph", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S4!", "recog_valid": false, "glyph_recog_text": "Steph", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242218.jpg", "caption": "a man holding a remote control in front of a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373316.jpg", "caption": "a person is holding a hot dog", "annotations": [{"polygon": [[87, 132], [60, 199], [64, 210], [95, 142]], "text": "JULY 2007", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "2205620-9", "recog_valid": false, "glyph_recog_text": "SULY2007", "glyph_recog_ld": 0.22222308641879285}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504396.jpg", "caption": "a dog laying on a rug in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242257.jpg", "caption": "a street sign with two street signs on it", "annotations": [{"polygon": [[161, 239], [160, 264], [226, 273], [225, 248]], "text": "CROOKS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CROOKS", "recog_valid": true, "glyph_recog_text": "CROOKS", "glyph_recog_ld": 1.0}, {"polygon": [[135, 298], [135, 321], [240, 309], [239, 283]], "text": "CORPORATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CORPORATE", "recog_valid": true, "glyph_recog_text": "CORPORATE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373332.jpg", "caption": "a television sitting on a table next to a wardrobe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111189.jpg", "caption": "a tennis player holding a blue umbrella", "annotations": [{"polygon": [[144, 290], [186, 315], [193, 306], [167, 290], [167, 285], [161, 285], [151, 280]], "text": "swisscom", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "swisscom", "recog_valid": true, "glyph_recog_text": "swisacom", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111223.jpg", "caption": "a lufthansa airplane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242296.jpg", "caption": "a living room with a couch, a table and a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111224.jpg", "caption": "a boy in a red shirt and a boy in a green shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242298.jpg", "caption": "a blue angel airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242303.jpg", "caption": "a group of stop signs and a white wall", "annotations": [{"polygon": [[118, 44], [115, 16], [208, 24], [204, 52]], "text": "LUCKY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LUCKY", "recog_valid": true, "glyph_recog_text": "LUCKY", "glyph_recog_ld": 1.0}, {"polygon": [[217, 25], [217, 52], [308, 58], [304, 32]], "text": "TATTOO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "TATTOO", "recog_valid": true, "glyph_recog_text": "TATTOO", "glyph_recog_ld": 1.0}, {"polygon": [[97, 453], [91, 400], [97, 392], [104, 388], [168, 360], [173, 368], [173, 382], [173, 393], [164, 398], [164, 418], [142, 436], [107, 459], [101, 459]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ST09", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[238, 345], [240, 395], [287, 382], [281, 335]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "s", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[188, 376], [190, 424], [237, 399], [234, 358]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373395.jpg", "caption": "a train is parked on the tracks and a cow is standing next to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111277.jpg", "caption": "a soccer player is kicking the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111281.jpg", "caption": "a woman taking a picture of herself in a bathroom mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373425.jpg", "caption": "a red caboose on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504502.jpg", "caption": "a toothbrush holder with a smiley face on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111290.jpg", "caption": "an old black and white photo of a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242376.jpg", "caption": "united airlines a320-200", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373457.jpg", "caption": "a woman standing in an airport with a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242399.jpg", "caption": "a baseball player sliding into home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111330.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373487.jpg", "caption": "a green truck with a boat on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373511.jpg", "caption": "a blender filled with vegetables and fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111371.jpg", "caption": "a man riding a skateboard in a public area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504606.jpg", "caption": "a woman eating a sandwich", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242472.jpg", "caption": "a woman sitting on a motorcycle in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373548.jpg", "caption": "a red train traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111406.jpg", "caption": "a bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373575.jpg", "caption": "two soccer players are playing on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242506.jpg", "caption": "a train is pulling into a station with people watching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504654.jpg", "caption": "a man is splashing water on a cow in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373596.jpg", "caption": "a small airplane is parked in the desert with a sign on it", "annotations": [{"polygon": [[370, 159], [364, 178], [369, 181], [500, 134], [413, 133]], "text": "accident", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "aeeioent", "recog_valid": false, "glyph_recog_text": "accident", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[314, 132], [300, 178], [324, 163], [348, 165], [374, 138], [373, 114], [359, 108], [321, 121]], "text": "Safe", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Snfe", "recog_valid": false, "glyph_recog_text": "Safe", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[386, 94], [375, 152], [384, 148], [472, 108], [472, 94]], "text": "fly", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "e kiyern", "recog_valid": false, "glyph_recog_text": "fly", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[310, 95], [307, 135], [311, 134], [338, 114], [380, 95]], "text": "Kem", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "tm", "recog_valid": false, "glyph_recog_text": "Kem", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242526.jpg", "caption": "a man riding a bike with a dog on a leash", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242532.jpg", "caption": "a baseball player in green and white uniform pitching a ball", "annotations": [{"polygon": [[462, 117], [460, 210], [430, 210], [432, 119]], "text": "326", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MNO", "recog_valid": false, "glyph_recog_text": "MNO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242535.jpg", "caption": "a woman standing next to a stop sign", "annotations": [{"polygon": [[274, 196], [409, 203], [406, 258], [270, 252]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242539.jpg", "caption": "a baseball team sitting in the dugout with bats and bats", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111472.jpg", "caption": "miniature fruit basket with oranges and bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242547.jpg", "caption": "a red and white subway train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111489.jpg", "caption": "a clock is on a brick walkway in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373637.jpg", "caption": "a black and white photo of a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111500.jpg", "caption": "a street sign with the word seymour on it", "annotations": [{"polygon": [[153, 105], [140, 126], [213, 188], [370, 292], [383, 278]], "text": "Seymour", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Seymour", "recog_valid": true, "glyph_recog_text": "S e y m o u r", "glyph_recog_ld": 0.5384618934908512}, {"polygon": [[406, 305], [449, 337], [449, 346], [445, 350], [404, 317], [401, 307]], "text": "1100", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "1100", "recog_valid": true, "glyph_recog_text": "1100", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373653.jpg", "caption": "a red bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242582.jpg", "caption": "a man standing in front of a produce stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111525.jpg", "caption": "a cart full of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504748.jpg", "caption": "two teddy bears sitting in a box with a heart", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373683.jpg", "caption": "a group of people skiing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111543.jpg", "caption": "a woman in a plaid skirt is reaching into a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373700.jpg", "caption": "a purple motorcycle parked next to a white motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504807.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242664.jpg", "caption": "a herd of elephants walking in a field at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242663.jpg", "caption": "a street sign and a traffic light on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373737.jpg", "caption": "a large clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373747.jpg", "caption": "a pizza and a beer on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111604.jpg", "caption": "a bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373758.jpg", "caption": "a cow standing on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504833.jpg", "caption": "a lufthansa airplane taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504850.jpg", "caption": "a man walking down a road holding a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242717.jpg", "caption": "a herd of zebras drinking water from a pond", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504862.jpg", "caption": "a woman sitting at a table with a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242823.jpg", "caption": "a man on a skateboard doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111768.jpg", "caption": "a bench painted with two men sitting on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111772.jpg", "caption": "a bowl of apples on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504988.jpg", "caption": "a soldier serving food to others in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504999.jpg", "caption": "a black and white photo of a street sign", "annotations": [{"polygon": [[269, 227], [447, 154], [448, 211], [267, 281]], "text": "Pennsylvania", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Pennsyivania", "recog_valid": false, "glyph_recog_text": "Pennsylvania", "glyph_recog_ld": 0.9166667361110532}, {"polygon": [[486, 168], [486, 141], [456, 153], [456, 179]], "text": "AVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AVE", "recog_valid": true, "glyph_recog_text": "AVE", "glyph_recog_ld": 1.0}, {"polygon": [[309, 306], [381, 280], [380, 245], [309, 274]], "text": "1300", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1300", "recog_valid": true, "glyph_recog_text": "1300", "glyph_recog_ld": 1.0}, {"polygon": [[43, 294], [39, 344], [105, 365], [108, 316]], "text": "13 1/2", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "13", "recog_valid": false, "glyph_recog_text": "13 1/2", "glyph_recog_ld": 0.3333344444425925}, {"polygon": [[138, 386], [135, 417], [73, 399], [77, 367]], "text": "300", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "300", "recog_valid": true, "glyph_recog_text": "300", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505003.jpg", "caption": "a street sign with the words 12th street and 12th street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242867.jpg", "caption": "a street sign with a hand and a sign has texts", "annotations": [{"polygon": [[165, 207], [163, 181], [288, 31], [295, 53]], "text": "PRESIDENT", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PRESIDENT", "recog_valid": true, "glyph_recog_text": "PRESIDENT", "glyph_recog_ld": 1.0}, {"polygon": [[216, 163], [215, 154], [263, 101], [267, 109]], "text": "JUDGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JUDGE", "recog_valid": true, "glyph_recog_text": "JVHES", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[161, 236], [160, 225], [216, 166], [218, 176]], "text": "CHARLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CHARLES", "recog_valid": true, "glyph_recog_text": "安线中rh35", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[191, 142], [192, 145], [221, 111], [218, 109]], "text": "S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "3的机", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[246, 132], [248, 141], [332, 47], [324, 43]], "text": "BECKINELLA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BECKINELLA", "recog_valid": true, "glyph_recog_text": "nMcniNai&s", "glyph_recog_ld": 0.1000008999991}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111807.jpg", "caption": "a computer monitor with two monitors and a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505028.jpg", "caption": "a woman is throwing a frisbee", "annotations": [{"polygon": [[178, 233], [171, 254], [138, 249], [132, 222], [176, 230]], "text": "IXIES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IXIES", "recog_valid": true, "glyph_recog_text": "IXIES", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242915.jpg", "caption": "a horse is standing in front of a door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111845.jpg", "caption": "two baseball players sliding into base during a game", "annotations": [{"polygon": [[401, 311], [398, 325], [392, 333], [370, 313], [350, 302], [323, 297], [337, 277], [376, 292]], "text": "FIGHTERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FIGHTERS", "recog_valid": true, "glyph_recog_text": "FIGHTERS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242926.jpg", "caption": "a poster advertising the william jenny scott collection", "annotations": [{"polygon": [[253, 315], [254, 328], [270, 327], [290, 321], [310, 312], [325, 301], [335, 289], [347, 274], [355, 258], [359, 248], [348, 242], [345, 250], [343, 255], [337, 266], [331, 275], [323, 285], [318, 290], [304, 300], [294, 307], [275, 312]], "text": "COLLECTION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SOLLECTION", "recog_valid": false, "glyph_recog_text": "COLLECTION", "glyph_recog_ld": 0.9000000999999}, {"polygon": [[155, 245], [162, 262], [170, 277], [177, 290], [193, 305], [210, 316], [240, 326], [242, 314], [232, 312], [218, 306], [198, 292], [181, 272], [176, 264], [166, 241]], "text": "ICELANDIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ICELANDIC", "recog_valid": true, "glyph_recog_text": "ICELANDIC", "glyph_recog_ld": 1.0}, {"polygon": [[314, 78], [306, 89], [322, 101], [329, 109], [337, 121], [344, 135], [349, 148], [352, 157], [365, 155], [360, 142], [357, 130], [351, 120], [344, 108], [336, 97], [325, 88]], "text": "LIBRARY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LIBRARY", "recog_valid": true, "glyph_recog_text": "LIBRARY", "glyph_recog_ld": 1.0}, {"polygon": [[149, 155], [161, 160], [166, 146], [172, 130], [181, 116], [192, 104], [201, 95], [211, 86], [204, 76], [195, 81], [185, 91], [172, 105], [160, 126], [155, 137]], "text": "HARVARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HARVARD", "recog_valid": true, "glyph_recog_text": "HARVARD", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374001.jpg", "caption": "a traffic light that is made out of a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374005.jpg", "caption": "holy oly revival the ocho", "annotations": [{"polygon": [[165, 95], [165, 160], [338, 160], [336, 137], [352, 97], [167, 96]], "text": "HOLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HOLY", "recog_valid": true, "glyph_recog_text": "HOLY", "glyph_recog_ld": 1.0}, {"polygon": [[373, 99], [372, 158], [373, 161], [489, 160], [489, 133], [504, 95], [436, 96], [392, 96], [376, 99]], "text": "OLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "OEY", "recog_valid": false, "glyph_recog_text": "OLY", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[172, 166], [174, 231], [493, 230], [480, 167], [175, 167]], "text": "REVIVAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REVINAL", "recog_valid": false, "glyph_recog_text": "REVIVAL", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[209, 246], [218, 289], [275, 284], [275, 265], [256, 246], [212, 245]], "text": "The", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tbe", "recog_valid": false, "glyph_recog_text": "The", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[308, 244], [289, 282], [306, 288], [405, 287], [412, 282], [412, 269], [383, 244], [310, 246]], "text": "Ocha!", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Debo", "recog_valid": false, "glyph_recog_text": "Ocha!", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[318, 426], [328, 416], [347, 410], [371, 405], [501, 371], [502, 373], [490, 427]], "text": "NOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "WOW", "recog_valid": false, "glyph_recog_text": "NOW", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374028.jpg", "caption": "a plate with meat, vegetables and a drink", "annotations": [{"polygon": [[156, 86], [158, 82], [166, 75], [180, 76], [190, 82], [197, 87], [200, 114], [199, 118], [195, 121], [185, 122], [176, 121], [168, 119], [164, 117], [162, 113]], "text": "NY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Y", "recog_valid": false, "glyph_recog_text": "y", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505104.jpg", "caption": "a busy city street with many people walking and driving", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242964.jpg", "caption": "a crowd of people with a large inflatable whale", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505126.jpg", "caption": "a fire hydrant on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374084.jpg", "caption": "a man and a woman are playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111943.jpg", "caption": "a man and woman sitting at a table with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243018.jpg", "caption": "a street sign with a one way sign on it", "annotations": [{"polygon": [[61, 199], [60, 237], [87, 238], [98, 199]], "text": "17", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "17", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[110, 199], [108, 240], [150, 241], [157, 202]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "ST", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374114.jpg", "caption": "a man is standing in the door of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505188.jpg", "caption": "a living room with a couch, a coffee table and a painting", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111972.jpg", "caption": "a woman playing tennis on a purple court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111992.jpg", "caption": "a couple of carts with luggage on them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111998.jpg", "caption": "a group of people standing in front of a bus", "annotations": [{"polygon": [[410, 126], [412, 159], [412, 164], [448, 152], [449, 111]], "text": "t", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "七", "recog_valid": false, "glyph_recog_text": "t", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[197, 200], [195, 249], [293, 253], [288, 201]], "text": "sta", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "sto", "recog_valid": false, "glyph_recog_text": "sta", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505226.jpg", "caption": "a cell phone with a lanyard attached to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374171.jpg", "caption": "a stop sign with a tomato on it", "annotations": [{"polygon": [[66, 95], [66, 95], [63, 137], [191, 151], [195, 111], [66, 92]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[82, 368], [141, 428], [149, 419], [89, 358]], "text": "PEDESTRIAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "PEDESTRIAN", "recog_valid": true, "glyph_recog_text": "PEDESTRIAN", "glyph_recog_ld": 1.0}, {"polygon": [[146, 409], [153, 416], [197, 363], [190, 355]], "text": "CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "CROSSING", "recog_valid": true, "glyph_recog_text": "CROBSING", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[453, 216], [449, 237], [449, 237], [508, 229], [509, 207]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOF", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505244.jpg", "caption": "a boat is sitting in the grass", "annotations": [{"polygon": [[146, 261], [190, 263], [221, 266], [267, 269], [266, 287], [230, 288], [182, 292], [148, 278]], "text": "Kinyfisher", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "hingfioher", "recog_valid": false, "glyph_recog_text": "Kinyfisher", "glyph_recog_ld": 0.7000002999996999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243173.jpg", "caption": "a refrigerator with a lot of food in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374248.jpg", "caption": "a couple of people sitting on the beach under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243176.jpg", "caption": "a woman is holding a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112106.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[150, 184], [164, 130], [265, 130], [386, 132], [399, 153], [402, 214], [303, 212], [233, 209], [149, 212]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112124.jpg", "caption": "a jockey is riding a horse in a park", "annotations": [{"polygon": [[180, 338], [198, 294], [212, 292], [198, 340]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2D", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243207.jpg", "caption": "a large airplane with luggage on the tarmac", "annotations": [{"polygon": [[88, 203], [88, 203], [160, 215], [157, 197], [86, 175]], "text": "Kenya", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Kenxa", "recog_valid": false, "glyph_recog_text": "Kenya", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374302.jpg", "caption": "a group of women standing on a sidewalk near a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243251.jpg", "caption": "a street sign is shown at night in a city", "annotations": [{"polygon": [[203, 184], [244, 210], [247, 204], [206, 177], [203, 180]], "text": "Gloucester", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Gloucester", "recog_valid": true, "glyph_recog_text": "diyiwue", "glyph_recog_ld": 0.1000008999991}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374325.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505421.jpg", "caption": "a group of people standing around a teddy bear", "annotations": [{"polygon": [[150, 198], [176, 203], [176, 233], [142, 229], [136, 214], [139, 198]], "text": "CA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CEA", "recog_valid": false, "glyph_recog_text": "CA", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112207.jpg", "caption": "a no bicycle parking sign on a white background", "annotations": [{"polygon": [[288, 217], [287, 260], [232, 265], [235, 221]], "text": "NO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NO", "recog_valid": true, "glyph_recog_text": "NO", "glyph_recog_ld": 1.0}, {"polygon": [[350, 342], [350, 392], [164, 397], [167, 351]], "text": "PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PARKING", "recog_valid": true, "glyph_recog_text": "PARKING", "glyph_recog_ld": 1.0}, {"polygon": [[346, 277], [347, 323], [171, 332], [174, 285]], "text": "BICYCLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BICYCLE", "recog_valid": true, "glyph_recog_text": "BICYCLE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374357.jpg", "caption": "a stop sign and a garbage truck on a city street", "annotations": [{"polygon": [[369, 155], [485, 156], [484, 200], [369, 201]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374361.jpg", "caption": "a woman walking down the street with her bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374360.jpg", "caption": "a large warehouse with red buses parked inside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243296.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374374.jpg", "caption": "a man and a woman riding skateboards down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374405.jpg", "caption": "a man throwing a frisbee in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505479.jpg", "caption": "a woman in a riding outfit is standing next to a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112272.jpg", "caption": "a baseball player is standing at home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505489.jpg", "caption": "a person walking in front of a fountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505493.jpg", "caption": "a bed with a stuffed animal, a box of candy, and a toothbrush", "annotations": [{"polygon": [[188, 334], [297, 262], [327, 281], [221, 358]], "text": "HERSHEY'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HERSHEY.S", "recog_valid": false, "glyph_recog_text": "HERSHEY'S", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[271, 323], [313, 292], [320, 300], [278, 331]], "text": "Chocolate", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Chocolate", "recog_valid": true, "glyph_recog_text": "Shhooewee", "glyph_recog_ld": 0.333334074073251}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505501.jpg", "caption": "a man and a woman in a cart", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374431.jpg", "caption": "two people are shearing a sheep on a pavement", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374463.jpg", "caption": "a stuffed bear sitting on a counter", "annotations": [{"polygon": [[232, 364], [296, 365], [297, 415], [234, 414]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "13", "recog_valid": true, "glyph_recog_text": "13", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112320.jpg", "caption": "a drawing of a refrigerator", "annotations": [{"polygon": [[338, 420], [338, 442], [381, 452], [383, 438], [376, 427]], "text": "inside", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "tns:de", "recog_valid": false, "glyph_recog_text": "inside", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112327.jpg", "caption": "a box of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374502.jpg", "caption": "a small plane sitting on the ground with grass and trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112363.jpg", "caption": "a narrow boat on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505583.jpg", "caption": "a street with a sign that says bristol water tower", "annotations": [{"polygon": [[41, 153], [41, 153], [51, 147], [65, 140], [77, 136], [92, 131], [116, 130], [133, 132], [142, 135], [161, 143], [177, 152], [166, 165], [175, 173], [173, 177], [160, 166], [143, 158], [129, 153], [113, 153], [94, 154], [72, 161], [53, 173]], "text": "BRISTOL", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "BRISTOL", "recog_valid": true, "glyph_recog_text": "BRISTOL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243443.jpg", "caption": "two motorcycles parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374520.jpg", "caption": "an austrian airways airplane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505611.jpg", "caption": "a crowded food market with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243500.jpg", "caption": "a dog carrying a frisbee in its mouth", "annotations": [{"polygon": [[278, 294], [239, 317], [248, 330], [287, 307]], "text": "KONO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONET", "recog_valid": false, "glyph_recog_text": "KONO", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243509.jpg", "caption": "a baseball player is holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243518.jpg", "caption": "a man standing in front of a large airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505666.jpg", "caption": "a white fire hydrant on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112459.jpg", "caption": "a table with a lot of food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374661.jpg", "caption": "a group of people gathered around a table", "annotations": [{"polygon": [[107, 413], [124, 407], [136, 425], [147, 442], [148, 448], [130, 448], [126, 446]], "text": "RADGA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "200", "recog_valid": false, "glyph_recog_text": "RADGA", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505738.jpg", "caption": "a group of people on motorcycles on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243630.jpg", "caption": "a laptop computer sitting on a desk", "annotations": [{"polygon": [[290, 246], [326, 245], [326, 216], [290, 216]], "text": "707", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LOL", "recog_valid": false, "glyph_recog_text": "707", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243650.jpg", "caption": "three people riding skateboards down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112584.jpg", "caption": "a person is opening a refrigerator door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243660.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112604.jpg", "caption": "a woman playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243717.jpg", "caption": "a blue double decker bus parked in a parking lot", "annotations": [{"polygon": [[174, 333], [174, 334], [174, 348], [254, 365], [255, 348]], "text": "megabus.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "megabus.com", "recog_valid": true, "glyph_recog_text": "megabus.com", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374796.jpg", "caption": "a clock on the side of a building", "annotations": [{"polygon": [[164, 152], [174, 164], [181, 159], [194, 151], [212, 144], [221, 141], [217, 128], [204, 131], [194, 136], [187, 139], [169, 146], [165, 150]], "text": "BOODLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BOODLE", "recog_valid": true, "glyph_recog_text": "BOODLE", "glyph_recog_ld": 1.0}, {"polygon": [[238, 124], [239, 140], [247, 140], [253, 141], [268, 143], [283, 148], [300, 156], [316, 170], [324, 158], [309, 148], [293, 138], [278, 133], [256, 128], [246, 124], [240, 124]], "text": "DUNTHORNE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DUNTHORNE", "recog_valid": true, "glyph_recog_text": "DUNTHORNE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243725.jpg", "caption": "a group of people playing frisbee on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374818.jpg", "caption": "three large passenger planes parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505898.jpg", "caption": "a woman is playing with two dogs on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374832.jpg", "caption": "a dog sitting on the floor in front of a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374845.jpg", "caption": "a kitchen with a sink and a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112703.jpg", "caption": "two double decker buses parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505924.jpg", "caption": "a girl in blue and yellow soccer uniform kicking the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243782.jpg", "caption": "a woman in a green skirt and white shirt is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112720.jpg", "caption": "a group of children playing baseball in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112726.jpg", "caption": "a white car with a clock on the windshield", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505963.jpg", "caption": "a woman holding a pink umbrella in a parade", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374891.jpg", "caption": "the street sign for oasis dr and wonderland dr", "annotations": [{"polygon": [[221, 201], [304, 227], [303, 190], [223, 163]], "text": "OASIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OASIS", "recog_valid": true, "glyph_recog_text": "OASIS", "glyph_recog_ld": 1.0}, {"polygon": [[185, 311], [185, 313], [359, 249], [356, 214], [183, 281]], "text": "WONDERLAND 4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WONDERLAND", "recog_valid": false, "glyph_recog_text": "WONDERLAND 4", "glyph_recog_ld": 0.8333334722221064}, {"polygon": [[368, 226], [368, 246], [416, 229], [414, 208]], "text": "4300", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "4300", "recog_valid": true, "glyph_recog_text": "4300", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243823.jpg", "caption": "a clock, a clock face, and a clock face", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243824.jpg", "caption": "a man and a woman sitting at a table with a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112757.jpg", "caption": "a black and white photo of a truck parked on a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374904.jpg", "caption": "a cow standing in the middle of a street", "annotations": [{"polygon": [[0, 81], [62, 87], [62, 118], [-1, 112]], "text": "HOUSE", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "HOUSE", "recog_valid": true, "glyph_recog_text": "HOUSE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112785.jpg", "caption": "a teddy bear sitting on a bed with a bunch of books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506030.jpg", "caption": "a man and woman sitting at a table with a cake", "annotations": [{"polygon": [[25, 90], [40, 87], [54, 95], [56, 122], [47, 140], [47, 140], [40, 144], [29, 139], [20, 128], [19, 102]], "text": "0", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "O", "recog_valid": false, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506034.jpg", "caption": "a view of a street from a car's rear view mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506045.jpg", "caption": "a refrigerator with a door open and a full fridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243910.jpg", "caption": "ingredients for banana bread", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112849.jpg", "caption": "a yellow truck is parked in a showroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374994.jpg", "caption": "a table with various items including a knife, scissors, toothbrush, and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506130.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112922.jpg", "caption": "a soccer player in black and red standing on a field", "annotations": [{"polygon": [[109, 238], [109, 238], [247, 240], [248, 283], [180, 282], [143, 280], [115, 281], [89, 279], [91, 239], [98, 237]], "text": "SOCCER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SOCCER", "recog_valid": true, "glyph_recog_text": "SOCCER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506140.jpg", "caption": "a person holding a wii remote in their hand", "annotations": [{"polygon": [[309, 292], [345, 276], [364, 285], [321, 313]], "text": "Wii", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wii", "recog_valid": true, "glyph_recog_text": "Wii", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244014.jpg", "caption": "a bathroom with a sink and mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375096.jpg", "caption": "a warning sign on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506183.jpg", "caption": "a fighter jet on the runway with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244096.jpg", "caption": "a red and white fire hydrant in the middle of a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506256.jpg", "caption": "a person holding a piece of fruit in front of a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375192.jpg", "caption": "a video game scene with a man on a ladder", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506270.jpg", "caption": "a keyboard and mouse on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375208.jpg", "caption": "a toothbrush, toothpaste, and a toothbrush holder", "annotations": [{"polygon": [[126, 225], [300, 212], [297, 254], [117, 270]], "text": "AquaFresh", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Aqugfresh", "recog_valid": false, "glyph_recog_text": "AquaFresh", "glyph_recog_ld": 0.7777780246910837}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244141.jpg", "caption": "a cell phone and a tablet are in a boxing ring", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244157.jpg", "caption": "a person skiing down a slope with a red flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375282.jpg", "caption": "a baseball player is holding a bat ready to hit the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113142.jpg", "caption": "a woman is playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113147.jpg", "caption": "a man wearing a hat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506372.jpg", "caption": "a school bus and a truck collided at a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244231.jpg", "caption": "three birds perched on a power line", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506379.jpg", "caption": "a parking meter is shown on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375311.jpg", "caption": "a bus with a poster for the movie on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506394.jpg", "caption": "a stop sign with stickers on it", "annotations": [{"polygon": [[153, 117], [184, 115], [232, 113], [285, 114], [331, 116], [349, 123], [355, 141], [354, 152], [336, 200], [269, 197], [225, 198], [161, 201], [150, 189], [150, 179]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506398.jpg", "caption": "a group of people sitting around a skateboard ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375331.jpg", "caption": "two women preparing food in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506411.jpg", "caption": "computer eye stress how to avoid it and how to fix it by dr antony hutchinson", "annotations": [{"polygon": [[135, 148], [137, 195], [329, 174], [313, 131]], "text": "COMPUTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CUMPUTER", "recog_valid": false, "glyph_recog_text": "COMPUTER", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[136, 198], [142, 253], [207, 246], [199, 192]], "text": "EYE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FYE", "recog_valid": false, "glyph_recog_text": "EYE", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[206, 191], [217, 245], [349, 225], [330, 176]], "text": "STRESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STHESS", "recog_valid": false, "glyph_recog_text": "STRESS", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[184, 366], [187, 387], [358, 356], [356, 338]], "text": "Dr.R.ANTHONY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Dr.R.ANTHONY", "recog_valid": true, "glyph_recog_text": "Dr.R.ANTHONY", "glyph_recog_ld": 1.0}, {"polygon": [[195, 388], [198, 406], [358, 378], [354, 358]], "text": "HUTCHINSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HUTCHINSON", "recog_valid": true, "glyph_recog_text": "HUTCHINSON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506417.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[257, 198], [272, 210], [262, 238], [248, 234], [248, 234], [238, 221]], "text": "41", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "4", "recog_valid": false, "glyph_recog_text": "子", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375347.jpg", "caption": "a tray with a pizza, a basket of french fries and a beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113212.jpg", "caption": "a woman's legs are on a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506429.jpg", "caption": "a green parrot on a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113276.jpg", "caption": "a young girl looking at a refrigerator in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375425.jpg", "caption": "a kitchen with a sink, stove and cabinets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244365.jpg", "caption": "a horse pulling a carriage down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375437.jpg", "caption": "a black mouse and a book on a desk", "annotations": [{"polygon": [[319, 204], [381, 226], [377, 242], [372, 243], [319, 219]], "text": "WINES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WINES", "recog_valid": true, "glyph_recog_text": "WINES", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113310.jpg", "caption": "air france a380-800 at paris-orly airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375455.jpg", "caption": "a large airplane taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113315.jpg", "caption": "a group of people standing around a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113317.jpg", "caption": "a man in blue shirt and red shoes is about to hit a tennis ball", "annotations": [{"polygon": [[-1, 76], [166, 74], [145, 143], [-1, 145]], "text": "YS", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "YS", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[211, 142], [274, 72], [513, 70], [512, 137]], "text": "ATP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ATP", "recog_valid": true, "glyph_recog_text": "ATP", "glyph_recog_ld": 1.0}, {"polygon": [[257, 175], [512, 172], [512, 236], [253, 240]], "text": "BARCL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BARCL", "recog_valid": true, "glyph_recog_text": "BARCL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506550.jpg", "caption": "a baseball player in a red and white uniform", "annotations": [{"polygon": [[224, 236], [267, 206], [275, 251], [225, 279]], "text": "TEO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "T1O", "recog_valid": false, "glyph_recog_text": "TEO", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[282, 207], [274, 255], [291, 269], [319, 223]], "text": "NS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "SN", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113338.jpg", "caption": "a clock is in the window of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506555.jpg", "caption": "children at a birthday party", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244416.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244423.jpg", "caption": "a person holding a cell phone with windows 8 on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506599.jpg", "caption": "two people on a ski lift", "annotations": [{"polygon": [[250, 312], [283, 269], [292, 271], [264, 315]], "text": "burton", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Buton", "recog_valid": false, "glyph_recog_text": "betian", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375528.jpg", "caption": "a man standing in a kitchen preparing food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244484.jpg", "caption": "a woman standing next to a microwave oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244520.jpg", "caption": "a woman holding a baseball bat in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375645.jpg", "caption": "a lunch box with a hello kitty theme", "annotations": [{"polygon": [[37, 65], [42, 78], [92, 60], [88, 48]], "text": "HELLO", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "HELLO", "recog_valid": true, "glyph_recog_text": "HELLO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244578.jpg", "caption": "a man in a wet suit carrying a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375737.jpg", "caption": "a church with a clock tower and a steeple", "annotations": [{"polygon": [[310, 407], [368, 396], [382, 407], [383, 420], [312, 430]], "text": "Luke's", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Luke's", "recog_valid": true, "glyph_recog_text": "Luke's", "glyph_recog_ld": 1.0}, {"polygon": [[281, 440], [357, 431], [360, 458], [281, 467]], "text": "Church", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Church", "recog_valid": true, "glyph_recog_text": "Church", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375741.jpg", "caption": "bananas with chocolate spots", "annotations": [{"polygon": [[66, 154], [66, 154], [63, 177], [106, 167], [105, 143]], "text": "silver", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "silver", "recog_valid": true, "glyph_recog_text": "silver", "glyph_recog_ld": 1.0}, {"polygon": [[61, 179], [64, 201], [64, 204], [108, 184], [107, 166]], "text": "spoon", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "spoor", "recog_valid": false, "glyph_recog_text": "spoor", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375750.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113617.jpg", "caption": "a man is playing tennis", "annotations": [{"polygon": [[228, 63], [221, 87], [309, 103], [315, 75]], "text": "Money", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Money", "recog_valid": true, "glyph_recog_text": "Money", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506843.jpg", "caption": "a large military plane on the runway with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244720.jpg", "caption": "two dogs standing on the back of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244721.jpg", "caption": "a bus is parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375814.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375820.jpg", "caption": "two laptops are on", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375826.jpg", "caption": "toronto toronto toronto toronto toronto toronto toronto toronto toronto toronto toronto toronto toronto toronto toronto toronto", "annotations": [{"polygon": [[455, 124], [456, 140], [490, 116], [487, 101], [473, 112]], "text": "Postes", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Postes", "recog_valid": true, "glyph_recog_text": "Postec", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375838.jpg", "caption": "a baseball player is holding a bat and ready to hit the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375841.jpg", "caption": "two horses grazing in a field with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506919.jpg", "caption": "a plane parked on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244795.jpg", "caption": "a street with a clock tower and people walking around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375869.jpg", "caption": "a cow with a tag on its ear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375882.jpg", "caption": "a woman is standing in front of a fruit stand", "annotations": [{"polygon": [[137, 295], [145, 315], [168, 297], [163, 282]], "text": "45", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "45", "recog_valid": true, "glyph_recog_text": "45", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506955.jpg", "caption": "a cat is sitting on a table", "annotations": [{"polygon": [[280, 335], [285, 321], [301, 305], [333, 288], [363, 282], [360, 268], [332, 272], [302, 287], [280, 298], [263, 321]], "text": "Shakespeare's Pizza", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "gheskenpenre", "recog_valid": false, "glyph_recog_text": "Shakespeae'ts Piuza", "glyph_recog_ld": 0.2105267313017204}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244816.jpg", "caption": "a bathroom with a toilet and a sink", "annotations": [{"polygon": [[222, 221], [231, 210], [259, 238], [251, 247]], "text": "READ MORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "READ", "recog_valid": false, "glyph_recog_text": "池3", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[214, 231], [221, 222], [248, 247], [241, 256]], "text": "READHORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HORe", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506988.jpg", "caption": "a little girl sitting on a counter eating a banana", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375939.jpg", "caption": "black and white photo of birds on beach with waves", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244870.jpg", "caption": "a yellow double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375947.jpg", "caption": "a man standing in an airport with a cart full of luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113806.jpg", "caption": "a train is pulling into a station with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113812.jpg", "caption": "a blue truck with a white trailer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244885.jpg", "caption": "a street sign and a one way sign on a pole", "annotations": [{"polygon": [[167, 128], [167, 161], [247, 187], [248, 156]], "text": "Fairlie", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Fairlie", "recog_valid": true, "glyph_recog_text": "Fairlie", "glyph_recog_ld": 1.0}, {"polygon": [[211, 220], [258, 197], [259, 223], [212, 248]], "text": "Luckie", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Luckie", "recog_valid": true, "glyph_recog_text": "Luckie", "glyph_recog_ld": 1.0}, {"polygon": [[288, 328], [294, 372], [348, 389], [357, 349]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[292, 274], [296, 312], [355, 334], [351, 298]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507028.jpg", "caption": "a candy shoppe sign is on the ground", "annotations": [{"polygon": [[295, 247], [443, 241], [464, 191], [295, 198], [288, 226]], "text": "CANDY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CANDY", "recog_valid": true, "glyph_recog_text": "CANDY", "glyph_recog_ld": 1.0}, {"polygon": [[298, 323], [460, 329], [462, 260], [292, 266]], "text": "SHOPPE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SHOPPE", "recog_valid": true, "glyph_recog_text": "SHOPPE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375992.jpg", "caption": "a bathroom with a toilet, sink and bathtub", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376009.jpg", "caption": "a woman sits in front of a poster with a group of children", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507091.jpg", "caption": "a parking meter on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113880.jpg", "caption": "two dogs are sitting on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244965.jpg", "caption": "a cat sitting on top of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507113.jpg", "caption": "a street with a no parking sign and a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113898.jpg", "caption": "a pizza and a bunch of other food in a box", "annotations": [{"polygon": [[220, 341], [221, 353], [308, 319], [307, 307]], "text": "GREENBOX", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GREENBOA", "recog_valid": false, "glyph_recog_text": "GREENBOX", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244975.jpg", "caption": "a man holding a cell phone in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507136.jpg", "caption": "a collage of pictures of a bride and groom getting ready for their wedding", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507157.jpg", "caption": "a man in a field with a kite in the sky", "annotations": [{"polygon": [[25, 38], [163, 43], [165, 68], [24, 68]], "text": "PETEP", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "PETER", "recog_valid": false, "glyph_recog_text": "PETEP", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113970.jpg", "caption": "a group of young children playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376114.jpg", "caption": "an elephant eating leaves from a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113979.jpg", "caption": "a man and a child standing on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376128.jpg", "caption": "british airways boeing 747-400", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245062.jpg", "caption": "a baseball player in a grey uniform", "annotations": [{"polygon": [[201, 195], [233, 192], [250, 211], [244, 217], [225, 222], [215, 214], [205, 204]], "text": "D", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "9", "recog_valid": false, "glyph_recog_text": "D", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507237.jpg", "caption": "a woman is riding a horse in front of a group of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114024.jpg", "caption": "a group of people walking in front of a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245105.jpg", "caption": "a man is feeding an elephant a hat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114035.jpg", "caption": "a woman in a white shirt and red pants standing in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507257.jpg", "caption": "a street sign in front of a cemetery", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507263.jpg", "caption": "a train traveling down a track in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376196.jpg", "caption": "a bus parked near a pool with a fountain in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507276.jpg", "caption": "a small yellow airplane flying in the sky", "annotations": [{"polygon": [[287, 333], [309, 331], [336, 384], [333, 387], [314, 388], [291, 342]], "text": "MHPH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MHPH", "recog_valid": true, "glyph_recog_text": "MHPH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245139.jpg", "caption": "a large passenger jet flying through the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114068.jpg", "caption": "a stop sign with a car parked next to it", "annotations": [{"polygon": [[233, 253], [308, 254], [311, 293], [239, 298], [233, 253]], "text": "WHOA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WHOA", "recog_valid": true, "glyph_recog_text": "WHOA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245142.jpg", "caption": "a beach with a lot of people flying kites", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114083.jpg", "caption": "a person holding a wii remote", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114101.jpg", "caption": "a large truck pulling a bus down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507342.jpg", "caption": "a man standing next to a car on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376283.jpg", "caption": "a toy vehicle with yellow and black tires", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114140.jpg", "caption": "a city street at night with neon signs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114149.jpg", "caption": "a group of boats docked in a harbor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114174.jpg", "caption": "a virtual reality scene with a table full of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245265.jpg", "caption": "a city street with a lot of buildings and cars", "annotations": [{"polygon": [[30, 65], [26, 126], [25, 136], [48, 139], [55, 84], [55, 67], [31, 65]], "text": "ENJI", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "2ZH", "recog_valid": false, "glyph_recog_text": "Wz-", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114204.jpg", "caption": "two double decker buses parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114222.jpg", "caption": "a woman cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114225.jpg", "caption": "imam hamdi slimi, the imam of the mosque in montreal, was arrested in toronto on saturday", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114229.jpg", "caption": "two women wearing wigs and sunglasses sitting in the stands", "annotations": [{"polygon": [[326, 359], [413, 360], [445, 357], [445, 326], [412, 322], [388, 326], [366, 319], [332, 309], [326, 343]], "text": "Astros", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "lstros", "recog_valid": false, "glyph_recog_text": "Astros", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[82, 298], [75, 342], [200, 347], [205, 302], [82, 292]], "text": "Astros", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Hstncs", "recog_valid": false, "glyph_recog_text": "Astros", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376385.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "annotations": [{"polygon": [[2, 189], [0, 224], [115, 220], [114, 201], [99, 200], [95, 189], [89, 192], [91, 200], [17, 203], [12, 190], [3, 190]], "text": "Canada", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Canada", "recog_valid": true, "glyph_recog_text": "Canada", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245314.jpg", "caption": "a truck is parked in front of a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507459.jpg", "caption": "a woman sitting on a chair in front of a computer", "annotations": [{"polygon": [[68, 459], [443, 464], [443, 501], [71, 496]], "text": "meissaadretcom", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "meisecadretciom", "recog_valid": false, "glyph_recog_text": "meissaadretcom", "glyph_recog_ld": 0.8000001333332445}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507460.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[219, 182], [218, 145], [255, 147], [262, 156], [259, 164], [256, 176], [255, 180], [255, 186]], "text": "po", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PO", "recog_valid": false, "glyph_recog_text": "po", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245326.jpg", "caption": "three jockeys race horses down the track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245351.jpg", "caption": "a man riding a horse through a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114286.jpg", "caption": "a group of young children playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507535.jpg", "caption": "two motorcycles parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114326.jpg", "caption": "a child with a pirate shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376479.jpg", "caption": "a group of people standing around a plane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245409.jpg", "caption": "a baseball player in a grey uniform throwing a ball", "annotations": [{"polygon": [[219, 95], [358, 94], [357, 135], [224, 140]], "text": "Wawa", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Wawa", "recog_valid": true, "glyph_recog_text": "Wawa", "glyph_recog_ld": 1.0}, {"polygon": [[156, 227], [160, 237], [172, 227], [186, 221], [205, 212], [216, 208], [216, 199], [201, 199], [185, 201], [174, 204], [163, 211], [158, 221]], "text": "HAGERSTOWN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "scaRSTOwe", "recog_valid": false, "glyph_recog_text": "AHAGERSTOAWN", "glyph_recog_ld": 0.33333388888842586}, {"polygon": [[223, 201], [205, 217], [201, 221], [209, 232], [234, 213]], "text": "47", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "4", "recog_valid": false, "glyph_recog_text": "47", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[218, 0], [217, 60], [87, 60], [86, -1]], "text": "ring", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ring", "recog_valid": true, "glyph_recog_text": "ring", "glyph_recog_ld": 1.0}, {"polygon": [[357, 0], [245, 0], [246, 41], [357, 38]], "text": "the", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "tne", "recog_valid": false, "glyph_recog_text": "the", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114340.jpg", "caption": "a table with two boxes of donuts and a bowl of milk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376492.jpg", "caption": "a large white truck is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245425.jpg", "caption": "a baseball player running to first base", "annotations": [{"polygon": [[206, 200], [245, 187], [250, 207], [211, 221]], "text": "Ad", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AM", "recog_valid": false, "glyph_recog_text": "A d", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114353.jpg", "caption": "a man and a young boy riding a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376502.jpg", "caption": "two men eating pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114363.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[139, 54], [161, 69], [176, 79], [178, 84], [177, 96], [134, 68]], "text": "Saint-", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Saint", "recog_valid": false, "glyph_recog_text": "Saint-", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[184, 82], [222, 107], [238, 122], [238, 133], [183, 98]], "text": "Mathieu", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Mathieu", "recog_valid": true, "glyph_recog_text": "Maihteu", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[320, 137], [369, 120], [373, 137], [323, 155]], "text": "BAILE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BAILE", "recog_valid": true, "glyph_recog_text": "BAILE", "glyph_recog_ld": 1.0}, {"polygon": [[275, 273], [327, 293], [328, 304], [275, 286]], "text": "LIVRAISAON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LINRAISON", "recog_valid": false, "glyph_recog_text": "VRASAON", "glyph_recog_ld": 0.44444506172770915}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245441.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245451.jpg", "caption": "a computer monitor and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376523.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[297, 244], [310, 247], [338, 263], [361, 277], [393, 304], [386, 313], [372, 323], [337, 295], [309, 281], [276, 266]], "text": "ASTUDILLO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SHULL", "recog_valid": false, "glyph_recog_text": "ASTUDILLO", "glyph_recog_ld": 0.44444506172770915}, {"polygon": [[336, 298], [351, 311], [309, 351], [292, 342], [252, 312], [292, 274]], "text": "30", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "80", "recog_valid": false, "glyph_recog_text": "30", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114403.jpg", "caption": "a man in a red suit skiing down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376558.jpg", "caption": "a man sitting on a bench next to a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114468.jpg", "caption": "a food processor with a white substance in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114474.jpg", "caption": "a woman standing on a tennis court with a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245551.jpg", "caption": "a yellow bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376623.jpg", "caption": "a subway sign on a building with a car driving by", "annotations": [{"polygon": [[296, 227], [316, 220], [370, 224], [378, 256], [371, 261], [343, 252], [305, 252], [294, 249]], "text": "DEPOR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5R06", "recog_valid": false, "glyph_recog_text": "DEPOR", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[306, 254], [332, 258], [353, 256], [370, 281], [366, 292], [339, 282], [320, 291], [309, 287], [302, 272]], "text": "SOE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "SOE", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[210, 316], [333, 306], [340, 316], [314, 343], [196, 348], [193, 339]], "text": "SUBWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SUBWAY", "recog_valid": true, "glyph_recog_text": "SUBWAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114500.jpg", "caption": "a parking meter on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245577.jpg", "caption": "a plate with a sandwich and corn on it", "annotations": [{"polygon": [[0, 259], [0, 259], [27, 244], [87, 238], [106, 246], [102, 268], [0, 284]], "text": "elblock", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "celblock:", "recog_valid": false, "glyph_recog_text": "elblock", "glyph_recog_ld": 0.7777780246910837}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114515.jpg", "caption": "a man on a skateboard is going through cones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114521.jpg", "caption": "a dog is standing on a no dogs sign", "annotations": [{"polygon": [[99, 136], [99, 176], [132, 177], [140, 157], [136, 139], [132, 136]], "text": "NO", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "NO", "recog_valid": true, "glyph_recog_text": "N", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[149, 136], [149, 178], [230, 178], [232, 137]], "text": "DOGS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DOGS", "recog_valid": true, "glyph_recog_text": "DOGS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376668.jpg", "caption": "a green bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507741.jpg", "caption": "a man with no shirt on is fixing a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507744.jpg", "caption": "a man standing in front of a table with a cake", "annotations": [{"polygon": [[326, 181], [322, 215], [336, 219], [350, 232], [362, 220], [384, 223], [392, 183], [367, 181], [358, 167], [342, 179]], "text": "LBM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "81", "recog_valid": false, "glyph_recog_text": "孟", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507763.jpg", "caption": "a vase of sunflowers on a table outside", "annotations": [{"polygon": [[89, 94], [94, 90], [123, 94], [129, 124], [88, 117]], "text": "rky", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "rky", "recog_valid": true, "glyph_recog_text": "rky", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376697.jpg", "caption": "a man holding a beer bottle", "annotations": [{"polygon": [[226, 204], [254, 191], [278, 185], [283, 194], [306, 192], [306, 200], [306, 211], [296, 213], [286, 215], [279, 225], [277, 230], [264, 233], [259, 233], [251, 224], [234, 229]], "text": "Tiger", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tige!", "recog_valid": false, "glyph_recog_text": "Tiger", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376746.jpg", "caption": "a red and blue train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245675.jpg", "caption": "a man riding a snowboard down a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376750.jpg", "caption": "a couple of kids standing around a bunch of mini bikes", "annotations": [{"polygon": [[131, 357], [136, 362], [162, 340], [159, 331]], "text": "Go", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GOIINIII", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507827.jpg", "caption": "a train at a station with a sign that says 5", "annotations": [{"polygon": [[197, 105], [211, 105], [211, 138], [180, 137], [181, 126]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "4", "recog_valid": true, "glyph_recog_text": "4", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376757.jpg", "caption": "a group of people standing on a tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114624.jpg", "caption": "a building with a reflection of a street sign", "annotations": [{"polygon": [[314, 271], [316, 285], [361, 267], [361, 255]], "text": "PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "081X9A9", "recog_valid": false, "glyph_recog_text": "PARKING", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376785.jpg", "caption": "a giraffe standing next to a baby giraffe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376791.jpg", "caption": "a young boy is drinking from a glass of milk", "annotations": [{"polygon": [[214, 272], [347, 278], [353, 312], [215, 309]], "text": "E", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NEE", "recog_valid": false, "glyph_recog_text": "E", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114653.jpg", "caption": "a skateboarder doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114657.jpg", "caption": "a cat standing on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376802.jpg", "caption": "two pictures of a yellow trolley car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507875.jpg", "caption": "a man and a woman laying on a bunk bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507876.jpg", "caption": "a group of people standing around in a large room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245733.jpg", "caption": "angry birds cake", "annotations": [{"polygon": [[145, 351], [170, 364], [221, 381], [253, 383], [289, 380], [325, 371], [346, 363], [363, 350], [372, 342], [375, 348], [369, 362], [353, 373], [331, 386], [309, 396], [267, 402], [242, 403], [202, 396], [175, 386], [155, 376], [154, 364], [146, 357]], "text": "THEODO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ueop", "recog_valid": false, "glyph_recog_text": "THEODO", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[257, 414], [257, 434], [228, 434], [210, 431], [168, 415], [177, 401], [192, 408], [209, 412], [228, 415], [241, 416]], "text": "SMILE!!", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "3708", "recog_valid": false, "glyph_recog_text": "SMILEI!", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[378, 361], [391, 372], [383, 384], [358, 405], [324, 424], [315, 409], [336, 398], [357, 384], [368, 372]], "text": "MAKES", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "S3XAW", "recog_valid": false, "glyph_recog_text": "MAKES", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376829.jpg", "caption": "a boy holding a toothbrush in his mouth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245757.jpg", "caption": "a room with a desk, chair, and a lamp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376843.jpg", "caption": "a man playing tennis on a court", "annotations": [{"polygon": [[24, 372], [61, 362], [175, 426], [109, 428]], "text": "MONTREAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "TERLKO!", "recog_valid": false, "glyph_recog_text": "MONTREAL", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507936.jpg", "caption": "a blue train on the tracks near a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376870.jpg", "caption": "a bicycle parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114728.jpg", "caption": "a large passenger jet flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114741.jpg", "caption": "a street sign that says stoner ave", "annotations": [{"polygon": [[130, 216], [173, 205], [276, 183], [268, 223], [133, 263], [128, 255]], "text": "Stoner", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Stoner", "recog_valid": true, "glyph_recog_text": "Stoner", "glyph_recog_ld": 1.0}, {"polygon": [[303, 163], [351, 160], [342, 200], [298, 212]], "text": "Av", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AV", "recog_valid": false, "glyph_recog_text": "A", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245839.jpg", "caption": "a man wearing a camouflage jacket and a hat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376916.jpg", "caption": "an old green truck parked in front of a wooden fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507992.jpg", "caption": "a variety of military equipment including a rifle, a knife, a knife sharpener, a knife, a knife sharpener, a knife, a knife sharpener", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376934.jpg", "caption": "a double decker bus on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376958.jpg", "caption": "a television set on a wooden entertainment center", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376965.jpg", "caption": "a basketball player dribbling the ball", "annotations": [{"polygon": [[30, 145], [11, 181], [124, 183], [128, 162], [46, 161], [47, 145]], "text": "Mike Hynan Hynan Coach Services", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HYNAN", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245895.jpg", "caption": "a baseball player pitching a ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508053.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[241, 238], [240, 284], [102, 289], [100, 236]], "text": "407", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "407", "recog_valid": true, "glyph_recog_text": "407", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114844.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245935.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377007.jpg", "caption": "a desk with two computer monitors and a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508092.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377059.jpg", "caption": "1936 ford pickup truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246007.jpg", "caption": "a truck full of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114937.jpg", "caption": "a person riding a skateboard on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114943.jpg", "caption": "a man standing in front of a train with his luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246016.jpg", "caption": "a baby is playing in a crib with a toy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114945.jpg", "caption": "a woman holding a tennis racket on a tennis court", "annotations": [{"polygon": [[66, 229], [64, 259], [182, 255], [180, 232], [156, 227], [116, 225]], "text": "Emirates", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Emirates", "recog_valid": true, "glyph_recog_text": "Emirates", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377105.jpg", "caption": "a woman laying on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377110.jpg", "caption": "a man in a chef's uniform standing next to a blender", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508191.jpg", "caption": "a motorcycle parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114979.jpg", "caption": "a man and a woman brushing their teeth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508207.jpg", "caption": "a double decker bus with a beer advertisement on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115018.jpg", "caption": "a family sitting at a table eating pizza and salad", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246099.jpg", "caption": "a backpack with a cell phone, keys, and other items", "annotations": [{"polygon": [[375, 210], [430, 223], [434, 234], [429, 240], [419, 241], [367, 228], [364, 217]], "text": "GEO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "GEO", "recog_valid": true, "glyph_recog_text": "GEO", "glyph_recog_ld": 1.0}, {"polygon": [[299, 358], [293, 368], [350, 391], [355, 384]], "text": "S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S.8", "recog_valid": false, "glyph_recog_text": "s", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377174.jpg", "caption": "a train is parked at a train station", "annotations": [{"polygon": [[2, 71], [145, 58], [152, 102], [-1, 115]], "text": "ATISLAVA", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "ATISLAVA", "recog_valid": true, "glyph_recog_text": "ATISLAVA", "glyph_recog_ld": 1.0}, {"polygon": [[165, 57], [306, 42], [312, 83], [161, 98]], "text": "HLAVNA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HLAVNA", "recog_valid": true, "glyph_recog_text": "HLAVNA", "glyph_recog_ld": 1.0}, {"polygon": [[328, 37], [504, 16], [508, 63], [327, 81], [321, 72]], "text": "STANT", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "STANICA", "recog_valid": false, "glyph_recog_text": "STANT", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508278.jpg", "caption": "a kitchen with a large island and stainless steel appliances", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246137.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508281.jpg", "caption": "a boy wearing lego glasses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508292.jpg", "caption": "a bus with a bicycle on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377226.jpg", "caption": "a green double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246156.jpg", "caption": "a bicycle with a sign that says upper crust", "annotations": [{"polygon": [[332, 146], [337, 185], [399, 183], [436, 156], [446, 135], [446, 132], [359, 139], [332, 145]], "text": "the Upper", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Upper", "recog_valid": false, "glyph_recog_text": "the Uppe", "glyph_recog_ld": 0.37500078124902336}, {"polygon": [[338, 192], [334, 230], [435, 213], [439, 186], [339, 192]], "text": "Crust", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ciust", "recog_valid": false, "glyph_recog_text": "Crust", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[441, 266], [443, 286], [498, 273], [492, 252], [441, 265]], "text": "4111", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "4111", "recog_valid": true, "glyph_recog_text": "4111", "glyph_recog_ld": 1.0}, {"polygon": [[367, 284], [373, 305], [426, 290], [423, 270], [368, 284]], "text": "497", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "497", "recog_valid": true, "glyph_recog_text": "497", "glyph_recog_ld": 1.0}, {"polygon": [[140, 306], [169, 356], [174, 355], [150, 304]], "text": "SPECIALIZED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPEELIALIT", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 9.99998999939855e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115086.jpg", "caption": "a refrigerator with many pictures and magnets on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115087.jpg", "caption": "a bathroom with two sinks and a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377251.jpg", "caption": "a table with a child and adults sitting around it", "annotations": [{"polygon": [[330, 325], [319, 333], [318, 340], [326, 343], [328, 348], [338, 352], [351, 367], [355, 365], [357, 353], [346, 338], [335, 325]], "text": "Spinach", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Spinuch", "recog_valid": false, "glyph_recog_text": "Spinech", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246181.jpg", "caption": "two large passenger jets parked on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246246.jpg", "caption": "two children on a train ride", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377329.jpg", "caption": "a cow in a pen with hay", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508403.jpg", "caption": "a young boy in a red shirt is running to catch a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508414.jpg", "caption": "a kfc sign on a pole", "annotations": [{"polygon": [[195, 242], [212, 226], [224, 217], [254, 200], [254, 196], [247, 179], [246, 178], [236, 181], [224, 190], [208, 200], [198, 210], [191, 220]], "text": "KFC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KFC", "recog_valid": true, "glyph_recog_text": "KFC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115237.jpg", "caption": "a man standing next to a truck in front of a barn", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508460.jpg", "caption": "two men in suits holding a blue tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115250.jpg", "caption": "a small outdoor pizza oven with a sign on it", "annotations": [{"polygon": [[273, 298], [343, 264], [364, 276], [296, 308]], "text": "Shafter", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Shafter", "recog_valid": true, "glyph_recog_text": "Shafter", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377412.jpg", "caption": "a woman standing in front of a food stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246345.jpg", "caption": "a person riding a horse over an obstacle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115282.jpg", "caption": "a person standing on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246356.jpg", "caption": "a cat is standing on a shelf in a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377432.jpg", "caption": "two people on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377473.jpg", "caption": "a man in a red shirt is taking a picture of a woman", "annotations": [{"polygon": [[287, 227], [299, 225], [314, 226], [335, 229], [345, 231], [357, 237], [345, 260], [331, 256], [320, 255], [300, 254], [288, 255]], "text": "DALAZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DAIR", "recog_valid": false, "glyph_recog_text": "DALAZ", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377476.jpg", "caption": "a young girl eating lunch in a classroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246412.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[408, 307], [429, 341], [442, 335], [418, 303]], "text": "1", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[420, 303], [444, 336], [458, 330], [438, 296]], "text": "15", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LR", "recog_valid": false, "glyph_recog_text": "1 5", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[392, 54], [400, 82], [467, 67], [463, 52]], "text": "BUDWERSIK", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Eudoeusen", "recog_valid": false, "glyph_recog_text": "BUDWVERSK", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115372.jpg", "caption": "a bus with a cartoon lion and french wooster on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246446.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115404.jpg", "caption": "a chinese restaurant with bicycles parked outside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246491.jpg", "caption": "a television in a room with boxes and boxes of books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246494.jpg", "caption": "a woman standing in a kitchen holding a wii remote", "annotations": [{"polygon": [[229, 199], [227, 239], [186, 241], [183, 199]], "text": "74", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "4", "recog_valid": false, "glyph_recog_text": "74", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377585.jpg", "caption": "a man on a skateboard wearing a helmet", "annotations": [{"polygon": [[123, 195], [122, 217], [155, 228], [159, 202]], "text": "23", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "23", "recog_valid": true, "glyph_recog_text": "23", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377597.jpg", "caption": "a stop sign in the middle of a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508678.jpg", "caption": "a man in a boat on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508680.jpg", "caption": "a large airplane with propellers on the ground", "annotations": [{"polygon": [[164, 261], [160, 254], [160, 251], [161, 247], [168, 248], [169, 251], [176, 252], [198, 259], [215, 266], [226, 270], [230, 284], [190, 272], [166, 263]], "text": "CONNIE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CONNIE", "recog_valid": true, "glyph_recog_text": "CONNIE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377619.jpg", "caption": "a train is on the tracks and a plane is flying over it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115502.jpg", "caption": "a double decker bus with a man on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377649.jpg", "caption": "a wall of cuckoo clocks with various designs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246586.jpg", "caption": "a white refrigerator in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246625.jpg", "caption": "a double decker bus driving down a busy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115564.jpg", "caption": "a train on the tracks with a city in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377756.jpg", "caption": "a black car parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508831.jpg", "caption": "a table with scissors, twigs and other materials", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246688.jpg", "caption": "a red and green truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377760.jpg", "caption": "a man holding a tennis racquet on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246700.jpg", "caption": "thomas the tank engine by thomas the tank engine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246706.jpg", "caption": "a plate with chips, dip, and carrots", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508855.jpg", "caption": "three men are standing on a tennis court holding tennis rackets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115639.jpg", "caption": "firefighters in a field near a fire truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115645.jpg", "caption": "a man eating a hot dog", "annotations": [{"polygon": [[0, 90], [133, 83], [134, 145], [1, 158]], "text": "ZEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "IZEL", "recog_valid": false, "glyph_recog_text": "ZEL", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[162, 97], [208, 105], [208, 148], [160, 147]], "text": "HOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HOT", "recog_valid": true, "glyph_recog_text": "8", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246723.jpg", "caption": "a cat sitting on a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508870.jpg", "caption": "a white truck with a red and white trailer", "annotations": [{"polygon": [[65, 65], [168, 122], [166, 146], [65, 97]], "text": "AMMOET", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "ANMOET", "recog_valid": false, "glyph_recog_text": "AMMOET", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115654.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[131, 300], [242, 272], [238, 262], [131, 287]], "text": "HORTICULTURAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HORTICULTURAL", "recog_valid": true, "glyph_recog_text": "HORTICULTURAL", "glyph_recog_ld": 1.0}, {"polygon": [[218, 309], [306, 290], [305, 275], [216, 295]], "text": "CATHEDRAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CATHEDRAL", "recog_valid": true, "glyph_recog_text": "CATHEDRAL", "glyph_recog_ld": 1.0}, {"polygon": [[363, 392], [361, 403], [467, 450], [468, 437]], "text": "BUCKINGHAM", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "BUCKINGRNAALACE", "recog_valid": false, "glyph_recog_text": "BUCKINOHAM", "glyph_recog_ld": 0.4666670222219852}, {"polygon": [[180, 103], [181, 115], [262, 175], [262, 162]], "text": "WESTMINSTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WESTMINSTER", "recog_valid": true, "glyph_recog_text": "WESTMINSTER", "glyph_recog_ld": 1.0}, {"polygon": [[177, 235], [177, 235], [173, 248], [228, 225], [226, 216]], "text": "ARCHIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ARCHIVE", "recog_valid": true, "glyph_recog_text": "ARQ时纱装", "glyph_recog_ld": 0.28571530612099116}, {"polygon": [[185, 213], [189, 223], [264, 191], [254, 182]], "text": "WESTMINSTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WESTMINSTE", "recog_valid": false, "glyph_recog_text": "WESTMINSTER", "glyph_recog_ld": 0.909090991735462}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508881.jpg", "caption": "an airplane parked at an airport with luggage carts", "annotations": [{"polygon": [[268, 288], [351, 301], [340, 323], [258, 305], [259, 299], [262, 293]], "text": "FINNAIR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FINNRIR", "recog_valid": false, "glyph_recog_text": "FINNAIR", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246750.jpg", "caption": "a pepsi sign on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246753.jpg", "caption": "a man holding a dog in his arms", "annotations": [{"polygon": [[0, 298], [0, 376], [46, 369], [38, 293]], "text": "ge", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "老", "recog_valid": false, "glyph_recog_text": "oa", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[52, 282], [174, 333], [144, 396], [44, 344]], "text": "County", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ouny", "recog_valid": false, "glyph_recog_text": "County", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246755.jpg", "caption": "soccer players in pink and green uniforms playing a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246759.jpg", "caption": "a blue bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246779.jpg", "caption": "a tray with donuts and drinks on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246787.jpg", "caption": "a sign hanging on a wall with a tie and a tie holder", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115718.jpg", "caption": "a street with a sign for a pizza place", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377878.jpg", "caption": "a steam locomotive is traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246848.jpg", "caption": "a large orange truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115816.jpg", "caption": "a street sign with a stop sign and a court sign", "annotations": [{"polygon": [[88, 142], [150, 163], [149, 192], [86, 172]], "text": "Central", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Central", "recog_valid": true, "glyph_recog_text": "Central", "glyph_recog_ld": 1.0}, {"polygon": [[80, 206], [141, 199], [141, 229], [80, 236]], "text": "Court", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Court", "recog_valid": true, "glyph_recog_text": "Court", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509047.jpg", "caption": "an old black and white photo of people on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246923.jpg", "caption": "a red scooter in a window display", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115854.jpg", "caption": "a soccer player is jumping to block the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115862.jpg", "caption": "a group of people standing in front of a large air force plane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509087.jpg", "caption": "a person riding a dirt bike on a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509098.jpg", "caption": "a man standing next to a motorcycle in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378030.jpg", "caption": "a traffic light is on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246970.jpg", "caption": "a baseball player sliding into home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246982.jpg", "caption": "four blue and yellow jets flying in formation", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247006.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115939.jpg", "caption": "three baseball players standing in a circle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378092.jpg", "caption": "a man playing a wii game", "annotations": [{"polygon": [[306, 172], [304, 187], [361, 165], [361, 151], [307, 172]], "text": "PROSTORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PTISTE", "recog_valid": false, "glyph_recog_text": "PROSTORE", "glyph_recog_ld": 0.5000006249992187}, {"polygon": [[123, 351], [152, 397], [228, 362], [216, 315], [193, 322], [123, 351]], "text": "Wii MOVE YOU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wii", "recog_valid": false, "glyph_recog_text": "WMIMORVEYOU", "glyph_recog_ld": 0.0909099173546205}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115959.jpg", "caption": "a man on a skateboard doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247052.jpg", "caption": "a bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115992.jpg", "caption": "a series of pictures of a baseball pitcher pitching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116004.jpg", "caption": "a crowd of people standing in a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378155.jpg", "caption": "a street sign with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116026.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247110.jpg", "caption": "a yellow train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116040.jpg", "caption": "four jets flying in formation", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378189.jpg", "caption": "a cat wearing a tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247126.jpg", "caption": "a baseball player running to first base", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378214.jpg", "caption": "a woman is showing a cow and sheep at a show", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247146.jpg", "caption": "a baseball game with a batter and catcher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247177.jpg", "caption": "a street sign with a dog on it", "annotations": [{"polygon": [[99, 222], [114, 222], [123, 227], [126, 234], [144, 233], [185, 234], [184, 276], [181, 282], [174, 282], [173, 280], [174, 276], [177, 274], [177, 270], [98, 267]], "text": "Dog", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Dog", "recog_valid": true, "glyph_recog_text": "Dog", "glyph_recog_ld": 1.0}, {"polygon": [[222, 223], [212, 228], [208, 242], [208, 257], [214, 267], [223, 271], [298, 271], [310, 273], [318, 270], [320, 268], [323, 258], [322, 246], [317, 240], [314, 238], [273, 237], [251, 236], [236, 234], [232, 227]], "text": "Gone", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Gone", "recog_valid": true, "glyph_recog_text": "Gone", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509321.jpg", "caption": "a couple of buses and a truck driving down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378268.jpg", "caption": "a black and white photo of an airplane parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378278.jpg", "caption": "a basket of fruit with a light shining on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509350.jpg", "caption": "a motorcycle parked at a gas station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509364.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247224.jpg", "caption": "a person is playing tennis on a court", "annotations": [{"polygon": [[154, 140], [158, 135], [197, 164], [193, 169]], "text": "HEAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "品物", "recog_valid": false, "glyph_recog_text": "risd", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378313.jpg", "caption": "a black and white photo of a cow in a pen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116185.jpg", "caption": "a large airplane parked at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509406.jpg", "caption": "a jet fighter on a metal stand in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116196.jpg", "caption": "a boat on the back of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378344.jpg", "caption": "a stop sign with a triangle on it", "annotations": [{"polygon": [[166, 219], [336, 187], [348, 220], [317, 253], [170, 271]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509442.jpg", "caption": "a woman holding a tennis racket in a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116229.jpg", "caption": "a bus driving down a street in front of a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247320.jpg", "caption": "a green train traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509482.jpg", "caption": "a white cake on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247346.jpg", "caption": "a bus stop with a bus parked next to it", "annotations": [{"polygon": [[20, 402], [45, 436], [58, 443], [78, 444], [231, 421], [202, 405], [216, 401], [216, 396], [192, 388], [179, 386], [166, 386], [120, 392], [103, 392], [27, 398]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STOR", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378419.jpg", "caption": "a horse and rider jumping over an obstacle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247348.jpg", "caption": "a bathroom with graffiti on the wall and a sink", "annotations": [{"polygon": [[343, 115], [345, 124], [382, 97], [379, 90]], "text": "EARTHWORM", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CARFHORN", "recog_valid": false, "glyph_recog_text": "EPeS", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[213, 70], [218, 75], [252, 51], [249, 43]], "text": "WELCOME", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "W6LCorC", "recog_valid": false, "glyph_recog_text": "wWilcun", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[169, 97], [121, 144], [129, 150], [180, 102], [177, 100]], "text": "SUTHERLAND", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SuTHerLAND", "recog_valid": false, "glyph_recog_text": "SUTHERLAND", "glyph_recog_ld": 0.7000002999996999}, {"polygon": [[109, 141], [114, 150], [141, 124], [132, 116]], "text": "MARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "MARK", "recog_valid": true, "glyph_recog_text": "MARK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116286.jpg", "caption": "a group of kids playing a video game", "annotations": [{"polygon": [[237, 201], [237, 250], [263, 253], [279, 247], [291, 195], [268, 199]], "text": "00", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "or", "recog_valid": false, "glyph_recog_text": "0", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247360.jpg", "caption": "a skateboarder doing a trick on a ramp at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509509.jpg", "caption": "a man and a child eating pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378458.jpg", "caption": "a man with long hair and a blue shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116334.jpg", "caption": "a man is reaching into a refrigerator", "annotations": [{"polygon": [[84, 242], [85, 288], [122, 261], [114, 236]], "text": "Sim", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Si", "recog_valid": false, "glyph_recog_text": "留", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116339.jpg", "caption": "a woman playing tennis on a blue court", "annotations": [{"polygon": [[175, 150], [175, 150], [166, 184], [251, 174], [251, 152]], "text": "Corona", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Corona", "recog_valid": true, "glyph_recog_text": "Corona", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509555.jpg", "caption": "a table with bottles and a candle on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247416.jpg", "caption": "two men in suits feeding each other cake", "annotations": [{"polygon": [[146, 121], [145, 86], [388, 85], [388, 118]], "text": "specialties", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Lhecialties", "recog_valid": false, "glyph_recog_text": "specialties", "glyph_recog_ld": 0.8181819834709241}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378499.jpg", "caption": "a man laying on the floor next to a toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509579.jpg", "caption": "a bird standing on the side of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509584.jpg", "caption": "a man on a motorcycle with a car behind him", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378521.jpg", "caption": "four women sitting on a bench with tennis rackets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247459.jpg", "caption": "a traffic light with a sign has texts", "annotations": [{"polygon": [[134, 425], [139, 471], [147, 471], [152, 452], [160, 453], [167, 454], [176, 454], [195, 458], [216, 466], [219, 448], [218, 445], [211, 434], [207, 429], [190, 427], [190, 415], [187, 416], [187, 432], [166, 431], [165, 414], [163, 414], [162, 424], [163, 430], [155, 428], [145, 417]], "text": "ogretmenkrdir", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ieonilediy", "recog_valid": false, "glyph_recog_text": "cyebratd", "glyph_recog_ld": 0.1000008999991}, {"polygon": [[153, 382], [154, 402], [169, 404], [183, 425], [186, 425], [185, 407], [193, 411], [193, 408], [192, 386], [185, 380], [169, 383], [170, 366], [167, 365], [166, 379]], "text": "ortaya", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "ortaya", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378541.jpg", "caption": "a cat sitting on top of a book shelf", "annotations": [{"polygon": [[49, 225], [71, 222], [104, 350], [82, 355]], "text": "TRUFFAUT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "LNUIINHL", "recog_valid": false, "glyph_recog_text": "TRUFFAUT", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[84, 363], [103, 359], [120, 427], [101, 429]], "text": "COCK", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "HOOS", "recog_valid": false, "glyph_recog_text": "COCK", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[151, 333], [171, 330], [144, 215], [123, 221]], "text": "FLORENCEE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FLORENCE", "recog_valid": false, "glyph_recog_text": "FLORENCEE", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[150, 335], [170, 331], [183, 386], [164, 393]], "text": " ETLA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ETLA", "recog_valid": false, "glyph_recog_text": "ETLA", "glyph_recog_ld": 1.0}, {"polygon": [[50, 425], [40, 385], [66, 383], [76, 428]], "text": "CCS", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "SD9", "recog_valid": false, "glyph_recog_text": "ccs", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[304, 300], [393, 430], [400, 423], [312, 292]], "text": "REPRESESNATION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Uni5alIHONV.", "recog_valid": false, "glyph_recog_text": "ROPRESESNATION", "glyph_recog_ld": 7.142852040953329e-07}, {"polygon": [[251, 310], [277, 425], [313, 420], [287, 304]], "text": "BIG BOOK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIGBOOK", "recog_valid": false, "glyph_recog_text": "BIG BOOK", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[224, 326], [241, 323], [261, 409], [248, 416]], "text": "COMPAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SVEWOO", "recog_valid": false, "glyph_recog_text": "COMPAS", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116402.jpg", "caption": "a family playing wii", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247475.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116423.jpg", "caption": "a train traveling down the tracks with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116431.jpg", "caption": "a man is playing tennis on a court", "annotations": [{"polygon": [[85, 221], [396, 224], [395, 294], [85, 288]], "text": "PARBAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PAR:BAS", "recog_valid": false, "glyph_recog_text": "PARBAS", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247508.jpg", "caption": "a person laying in bed with a blanket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378586.jpg", "caption": "a plane parked on the tarmac with the wing in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509682.jpg", "caption": "people are walking around a train station with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247545.jpg", "caption": "a tennis player on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378621.jpg", "caption": "a group of people playing frisbee on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247553.jpg", "caption": "a baby is looking at a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378628.jpg", "caption": "a brown cow standing in the grass next to a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509700.jpg", "caption": "a table with oranges and bananas for sale", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378638.jpg", "caption": "a person holding a lit candle next to a pizza with bacon on it", "annotations": [{"polygon": [[199, 303], [374, 309], [358, 376], [180, 374]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "chopPy", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 0.3333344444425925}, {"polygon": [[188, 360], [384, 359], [374, 435], [174, 436]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Bdlng", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.12500109374863277}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247597.jpg", "caption": "a man sitting on a train with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247603.jpg", "caption": "camarillo farmers market january 19th", "annotations": [{"polygon": [[16, 3], [511, 5], [513, 59], [497, 65], [8, 63], [0, 56], [-1, 12]], "text": "CAMARILLO'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CAMARM LO'S", "recog_valid": false, "glyph_recog_text": "CAMARILLO'S", "glyph_recog_ld": 0.8181819834709241}, {"polygon": [[-1, 76], [-1, 139], [397, 140], [412, 130], [414, 118], [410, 83], [394, 76]], "text": "FARMER", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "FARMER'S", "recog_valid": false, "glyph_recog_text": "FARMER", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[0, 152], [1, 215], [324, 216], [325, 168], [339, 168], [339, 152]], "text": "MARKET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MARKET", "recog_valid": true, "glyph_recog_text": "MARKET", "glyph_recog_ld": 1.0}, {"polygon": [[16, 228], [258, 229], [268, 237], [271, 249], [264, 269], [251, 292], [3, 292], [1, 278], [12, 274], [16, 270]], "text": "jan 19", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JAN19", "recog_valid": false, "glyph_recog_text": "jan 19", "glyph_recog_ld": 0.3333344444425925}, {"polygon": [[1, 359], [12, 367], [262, 366], [251, 341], [261, 329], [256, 314], [246, 305], [6, 303], [-1, 312]], "text": "CLEAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CLEAR", "recog_valid": true, "glyph_recog_text": "CLEAR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247631.jpg", "caption": "a clock on a table", "annotations": [{"polygon": [[206, 221], [206, 221], [252, 212], [256, 221], [250, 254], [205, 261]], "text": "22", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "22", "recog_valid": true, "glyph_recog_text": "22", "glyph_recog_ld": 1.0}, {"polygon": [[268, 210], [313, 202], [317, 244], [267, 251]], "text": ":05", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "05", "recog_valid": false, "glyph_recog_text": ":05", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116570.jpg", "caption": "a desk with a computer, monitor, and keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509804.jpg", "caption": "a bench sitting in the middle of a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116592.jpg", "caption": "a man on a motorbike is riding down the street", "annotations": [{"polygon": [[171, 101], [198, 89], [231, 87], [234, 106], [172, 118]], "text": "DUONG", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "DUONG", "recog_valid": true, "glyph_recog_text": "DUONG", "glyph_recog_ld": 1.0}, {"polygon": [[149, 135], [183, 127], [184, 152], [148, 158]], "text": "PHAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PHAN", "recog_valid": true, "glyph_recog_text": "PHAN", "glyph_recog_ld": 1.0}, {"polygon": [[216, 120], [241, 110], [259, 113], [259, 138], [217, 145]], "text": "CHAU", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CHAU", "recog_valid": true, "glyph_recog_text": "CHAU", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247680.jpg", "caption": "a desk with a laptop and a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247703.jpg", "caption": "a green fire hydrant on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378778.jpg", "caption": "a baseball player sliding into home plate", "annotations": [{"polygon": [[79, 266], [77, 294], [99, 296], [114, 269]], "text": "27", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "27", "recog_valid": true, "glyph_recog_text": "27", "glyph_recog_ld": 1.0}, {"polygon": [[266, 215], [266, 239], [306, 224], [294, 206]], "text": "SILLA 46", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "46", "recog_valid": false, "glyph_recog_text": "总桂上大餐", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509859.jpg", "caption": "a city street with cars and buses on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378790.jpg", "caption": "a white plate with fruit on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247718.jpg", "caption": "a young boy sitting at a table with a plate of pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116656.jpg", "caption": "a stop sign on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509876.jpg", "caption": "a group of people sitting on a bench in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378821.jpg", "caption": "a blue double decker bus", "annotations": [{"polygon": [[39, 167], [35, 196], [36, 228], [62, 206], [85, 201], [94, 217], [94, 244], [123, 231], [148, 203], [164, 184], [192, 177], [211, 175], [224, 186], [234, 189], [254, 120], [241, 111], [217, 102], [160, 117], [155, 124], [151, 140], [145, 140], [141, 142], [134, 153], [133, 166], [95, 176], [93, 138], [70, 140], [55, 153]], "text": "3899799", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "pngi7ig", "recog_valid": false, "glyph_recog_text": "3899799", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378827.jpg", "caption": "a man is playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378848.jpg", "caption": "a red bicycle leaning against a wall of green plants", "annotations": [{"polygon": [[180, 386], [246, 439], [235, 443], [170, 390]], "text": "RALEIGH", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "PLECY", "recog_valid": false, "glyph_recog_text": "RA4EIGM", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378849.jpg", "caption": "a wall with clothes hanging on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116708.jpg", "caption": "a woman on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378865.jpg", "caption": "a train traveling on a bridge over a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509939.jpg", "caption": "a small airplane parked on the dock next to a mountain range", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116732.jpg", "caption": "a railroad crossing sign on a street corner", "annotations": [{"polygon": [[382, 225], [412, 260], [406, 264], [377, 229]], "text": "RAILROAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "RATERROAD", "recog_valid": false, "glyph_recog_text": "aaose", "glyph_recog_ld": 1.1111098765503868e-06}, {"polygon": [[408, 227], [412, 232], [379, 262], [375, 256]], "text": "CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CROSSING", "recog_valid": true, "glyph_recog_text": "", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116735.jpg", "caption": "a person holding a cell phone", "annotations": [{"polygon": [[415, 349], [433, 363], [460, 333], [443, 319]], "text": "IBM", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "IBM", "recog_valid": true, "glyph_recog_text": "IBM", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116741.jpg", "caption": "a street sign that says mel torre way", "annotations": [{"polygon": [[173, 196], [172, 219], [262, 220], [261, 190]], "text": "TORME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TORME", "recog_valid": true, "glyph_recog_text": "TORME", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116755.jpg", "caption": "a person standing on a skateboard with their feet on the board", "annotations": [{"polygon": [[95, 338], [120, 353], [189, 302], [153, 286]], "text": "DGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "590", "recog_valid": false, "glyph_recog_text": "DGE", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378906.jpg", "caption": "people riding bicycles on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378907.jpg", "caption": "a little girl eating cake", "annotations": [{"polygon": [[464, 255], [461, 283], [513, 292], [514, 262]], "text": "194", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "192", "recog_valid": false, "glyph_recog_text": "194", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116771.jpg", "caption": "a truck is parked next to a street with cones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247866.jpg", "caption": "a young boy holding a baseball bat in front of a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510012.jpg", "caption": "a double decker bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116827.jpg", "caption": "a man playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247909.jpg", "caption": "a man standing behind a table with stuffed animals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378983.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510063.jpg", "caption": "a man and woman are in the kitchen of their camper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116851.jpg", "caption": "a clock is on a pole in front of a building", "annotations": [{"polygon": [[103, 53], [99, 79], [215, 112], [218, 89]], "text": "GOODERHAM", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "GOODERHAM", "recog_valid": true, "glyph_recog_text": "GOODERHAM", "glyph_recog_ld": 1.0}, {"polygon": [[234, 95], [232, 116], [278, 129], [281, 109]], "text": "WORTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "WORTS", "recog_valid": true, "glyph_recog_text": "WORTS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379024.jpg", "caption": "a stop sign with a street sign on it", "annotations": [{"polygon": [[288, 150], [287, 125], [356, 137], [356, 162], [323, 155], [323, 165], [319, 165], [316, 155]], "text": "Regent", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Regent", "recog_valid": true, "glyph_recog_text": "Regen", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[220, 216], [338, 229], [340, 248], [342, 288], [316, 286], [222, 278]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OTOP", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[240, 181], [248, 213], [328, 220], [328, 220], [310, 184]], "text": "22", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "22", "recog_valid": true, "glyph_recog_text": "2 2", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379037.jpg", "caption": "a street sign with a building in the background", "annotations": [{"polygon": [[306, 215], [305, 249], [449, 248], [448, 227], [419, 217], [307, 218]], "text": "boezio", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "boezio", "recog_valid": true, "glyph_recog_text": "boezio", "glyph_recog_ld": 1.0}, {"polygon": [[83, 293], [82, 313], [151, 316], [151, 298], [138, 288], [106, 286], [86, 292]], "text": "rotonda", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "rotonda", "recog_valid": true, "glyph_recog_text": "rotonda", "glyph_recog_ld": 1.0}, {"polygon": [[171, 300], [170, 324], [180, 324], [180, 318], [201, 319], [200, 294], [196, 294], [173, 300]], "text": "goti", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "goti", "recog_valid": true, "glyph_recog_text": "got", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247971.jpg", "caption": "a yellow airplane parked on a brick road", "annotations": [{"polygon": [[396, 278], [433, 269], [433, 269], [424, 297], [401, 300]], "text": "57", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "57", "recog_valid": true, "glyph_recog_text": "57", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248002.jpg", "caption": "a woman is serving doughnuts at a food stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379093.jpg", "caption": "a baseball player swinging a bat on a field", "annotations": [{"polygon": [[312, 248], [326, 217], [346, 201], [321, 176], [306, 190], [283, 228]], "text": "CASTRO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CAS7M", "recog_valid": false, "glyph_recog_text": "CASTRO", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[352, 212], [364, 238], [391, 274], [376, 294], [348, 298], [314, 265], [316, 251], [345, 217]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "C", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116956.jpg", "caption": "a propeller plane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248031.jpg", "caption": "a baseball player holding a bat in a batting cage", "annotations": [{"polygon": [[386, 249], [391, 267], [394, 258], [401, 260], [411, 263], [429, 276], [434, 246], [421, 233], [402, 235]], "text": "ANE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ANE", "recog_valid": true, "glyph_recog_text": "ANE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248066.jpg", "caption": "a man walking on the side of the road next to a traffic sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379159.jpg", "caption": "a black and white photo of two boats in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248091.jpg", "caption": "a dog sitting on a bed with a cat sitting on the bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510243.jpg", "caption": "a train traveling down the tracks near the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510246.jpg", "caption": "a woman standing in front of a microwave oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117034.jpg", "caption": "a man in a dark room with a laptop and headphones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510250.jpg", "caption": "a teddy bear and a bottle of liquor", "annotations": [{"polygon": [[364, 328], [357, 328], [358, 315], [365, 304], [377, 297], [391, 295], [405, 300], [415, 313], [411, 316], [406, 315], [402, 308], [398, 307], [390, 304], [384, 304], [379, 306], [374, 308], [368, 314]], "text": "FENTIMAAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "FENTILAAS", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248108.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[105, 198], [105, 191], [158, 217], [159, 224]], "text": "AVENUE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NENUE", "recog_valid": false, "glyph_recog_text": "srksth", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[38, 168], [37, 185], [110, 217], [109, 203]], "text": "EMILE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "EMILE", "recog_valid": true, "glyph_recog_text": "EMILE", "glyph_recog_ld": 1.0}, {"polygon": [[121, 209], [122, 223], [222, 268], [222, 254]], "text": "DUPLOYE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DUPLOYE", "recog_valid": true, "glyph_recog_text": "DUPLOYE", "glyph_recog_ld": 1.0}, {"polygon": [[295, 241], [460, 157], [464, 180], [297, 263]], "text": "SHERBROOKE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SHERBROOKE", "recog_valid": true, "glyph_recog_text": "SHERBROOKE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248133.jpg", "caption": "a cat is sleeping in a suitcase", "annotations": [{"polygon": [[44, 62], [45, 114], [194, 114], [191, 66]], "text": "PACKD", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "PACKD", "recog_valid": true, "glyph_recog_text": "PACKD", "glyph_recog_ld": 1.0}, {"polygon": [[201, 63], [203, 114], [262, 115], [260, 64]], "text": "UR", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "UR", "recog_valid": true, "glyph_recog_text": "UR", "glyph_recog_ld": 1.0}, {"polygon": [[271, 64], [273, 113], [495, 115], [495, 64]], "text": "SUTECASE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "SUTECASE", "recog_valid": true, "glyph_recog_text": "SUTECASE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379211.jpg", "caption": "a group of people posing for a photo with a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510304.jpg", "caption": "a no parking sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248191.jpg", "caption": "four packets of cereal sitting on a counter next to a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117121.jpg", "caption": "three men playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510342.jpg", "caption": "a man in a business suit is standing in front of a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379272.jpg", "caption": "a skateboarder is doing a trick in a bowl", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248204.jpg", "caption": "a large jet airplane flying low over a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117157.jpg", "caption": "a bus with a red and white design on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510391.jpg", "caption": "a group of people on a beach with a kite", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248263.jpg", "caption": "a man sitting on a bench in a market", "annotations": [{"polygon": [[333, 421], [368, 430], [370, 397], [330, 389]], "text": "IPOd", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "iPod", "recog_valid": false, "glyph_recog_text": "IPOd", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[331, 436], [329, 459], [365, 467], [367, 438]], "text": "NDS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "NDS", "recog_valid": true, "glyph_recog_text": "NDS", "glyph_recog_ld": 1.0}, {"polygon": [[330, 464], [325, 496], [355, 503], [363, 475]], "text": "PSP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "PSP", "recog_valid": true, "glyph_recog_text": "PSP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510414.jpg", "caption": "a pizza on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379350.jpg", "caption": "a man and a boy sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379349.jpg", "caption": "a stuffed animal is sitting on top of a bed", "annotations": [{"polygon": [[73, 371], [63, 377], [63, 377], [137, 404], [151, 393]], "text": "comfortable!", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "comfortable.", "recog_valid": false, "glyph_recog_text": "comfortable!", "glyph_recog_ld": 0.9166667361110532}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510434.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117230.jpg", "caption": "a man wearing a green hat", "annotations": [{"polygon": [[276, 199], [274, 230], [331, 225], [330, 190]], "text": "SON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MCON", "recog_valid": false, "glyph_recog_text": "SON", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117283.jpg", "caption": "1953 bsa bsa bsa bsa bsa bsa bsa bsa bsa bsa bsa bsa bsa bsa bsa", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248389.jpg", "caption": "a man sitting at a table with a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510536.jpg", "caption": "a train on the tracks with a yellow and white train", "annotations": [{"polygon": [[278, 232], [278, 266], [294, 265], [300, 262], [300, 274], [303, 274], [303, 253], [327, 239], [326, 220], [325, 199], [303, 219]], "text": "boxXoress.de", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "bordorea do", "glyph_recog_ld": 9.090900826569381e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510538.jpg", "caption": "a street sign on a pole in front of a church", "annotations": [{"polygon": [[308, 338], [313, 352], [360, 316], [350, 305]], "text": "WALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WALL", "recog_valid": true, "glyph_recog_text": "WALL", "glyph_recog_ld": 1.0}, {"polygon": [[140, 285], [140, 285], [137, 301], [219, 334], [222, 320]], "text": "BROADWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BROAONAN", "recog_valid": false, "glyph_recog_text": "BROADWAY", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[301, 259], [301, 259], [303, 269], [343, 235], [338, 227]], "text": "WALLs", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "32260272", "recog_valid": false, "glyph_recog_text": "别产三上t", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[144, 331], [143, 347], [164, 349], [190, 356], [218, 363], [220, 348]], "text": "BROADWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BROA", "recog_valid": false, "glyph_recog_text": "BROADWAY", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510540.jpg", "caption": "a group of men standing in front of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117327.jpg", "caption": "a plate of food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117336.jpg", "caption": "american airlines to cut flights to new york city", "annotations": [{"polygon": [[17, 210], [111, 216], [130, 220], [144, 215], [154, 219], [194, 221], [222, 226], [267, 224], [317, 228], [342, 228], [403, 125], [391, 120], [367, 123], [337, 122], [285, 120], [281, 103], [267, 103], [243, 121], [213, 126], [186, 122], [121, 122], [119, 108], [106, 108], [72, 130], [43, 161], [22, 198]], "text": "American", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Aiennean", "recog_valid": false, "glyph_recog_text": "American", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379498.jpg", "caption": "a woman using a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248461.jpg", "caption": "a cat laying on a table with a cell phone and a coffee cup", "annotations": [{"polygon": [[412, 63], [414, 90], [463, 94], [512, 90], [510, 64]], "text": "DOUB", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "DOUB", "recog_valid": true, "glyph_recog_text": "DOUB", "glyph_recog_ld": 1.0}, {"polygon": [[415, 94], [417, 141], [511, 138], [512, 95]], "text": "GUL", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "GUI", "recog_valid": false, "glyph_recog_text": "GUL", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248471.jpg", "caption": "a white and blue train sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248478.jpg", "caption": "a woman sitting at a table with two trays of food", "annotations": [{"polygon": [[91, 404], [123, 309], [138, 312], [117, 404]], "text": "THE WORKS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "WORKS", "recog_valid": false, "glyph_recog_text": "THE WORKS", "glyph_recog_ld": 0.5555560493821674}, {"polygon": [[362, 305], [389, 372], [406, 369], [380, 305]], "text": "WORKS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SH34A", "recog_valid": false, "glyph_recog_text": "WORKS", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117413.jpg", "caption": "a man on a motorcycle", "annotations": [{"polygon": [[32, 155], [31, 115], [142, 104], [142, 150]], "text": "SATUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SATUR", "recog_valid": true, "glyph_recog_text": "SATUR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248496.jpg", "caption": "a stop sign with a 4 way sign on it", "annotations": [{"polygon": [[170, 280], [174, 200], [394, 188], [396, 270]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[259, 369], [258, 411], [330, 413], [329, 371]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379578.jpg", "caption": "a woman on a motorcycle with a sidecar", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510652.jpg", "caption": "a laptop computer, a remote control, a passport, and a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379593.jpg", "caption": "a large room with many different types of furniture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510665.jpg", "caption": "a white bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510671.jpg", "caption": "a man in a wetsuit holding a surfboard in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510676.jpg", "caption": "two men sitting at a table with food and drinks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510680.jpg", "caption": "a green apple and a bunch of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379612.jpg", "caption": "a cat sitting on top of a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379613.jpg", "caption": "a female soccer player in white and green uniform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248565.jpg", "caption": "a yellow school bus is parked at the intersection of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379649.jpg", "caption": "a girl is holding a tennis racket on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117514.jpg", "caption": "a red truck with a white truck behind it", "annotations": [{"polygon": [[263, 230], [310, 249], [303, 277], [266, 261]], "text": "VMC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VMC", "recog_valid": true, "glyph_recog_text": "VMC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117536.jpg", "caption": "a yellow and blue fire hydrant in the middle of a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510757.jpg", "caption": "a clock is shown in front of a no passing zone sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510768.jpg", "caption": "a red train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510771.jpg", "caption": "a young boy holding a baseball bat on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510781.jpg", "caption": "a clock with a bird on it that says love is like a butterfly", "annotations": [{"polygon": [[371, 458], [368, 512], [412, 512], [413, 459]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "4", "recog_valid": true, "glyph_recog_text": "4", "glyph_recog_ld": 1.0}, {"polygon": [[60, 167], [61, 209], [262, 210], [260, 168]], "text": "LOVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UOMG", "recog_valid": false, "glyph_recog_text": "LOVE", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[413, 320], [414, 388], [455, 380], [451, 318]], "text": "3", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "M", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 1.0}, {"polygon": [[377, 192], [377, 253], [417, 251], [414, 188]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CN", "recog_valid": false, "glyph_recog_text": "N", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[134, 57], [135, 117], [188, 119], [189, 54]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[7, 91], [10, 152], [52, 152], [50, 89]], "text": "11", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "二", "recog_valid": false, "glyph_recog_text": "一、", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248637.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379711.jpg", "caption": "a busy city street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510790.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248649.jpg", "caption": "a desk with a variety of items on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510799.jpg", "caption": "a steam train traveling down the tracks near a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379736.jpg", "caption": "a chocolate donut and a cup of coffee on a magazine", "annotations": [{"polygon": [[192, 155], [184, 182], [211, 189], [278, 192], [282, 174], [252, 159], [222, 162]], "text": "optim", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "optim", "recog_valid": true, "glyph_recog_text": "optim", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510812.jpg", "caption": "a man holding a pink umbrella", "annotations": [{"polygon": [[96, 307], [93, 319], [92, 335], [101, 339], [183, 349], [215, 328], [203, 321], [186, 304], [175, 307], [156, 314], [147, 301], [124, 294], [102, 305]], "text": "Docotny", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Docotny", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[376, 317], [403, 314], [415, 303], [420, 302], [416, 312], [411, 317], [426, 319], [422, 333], [408, 346], [402, 348], [398, 343], [405, 336], [394, 335], [385, 342], [377, 338], [377, 333], [369, 335], [363, 335], [360, 344], [348, 348], [345, 348], [335, 342], [346, 335], [347, 335], [351, 330], [352, 320], [352, 320], [352, 320]], "text": "graphy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "4206", "recog_valid": false, "glyph_recog_text": "graphy", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117601.jpg", "caption": "two people holding a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510820.jpg", "caption": "a table with a variety of food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379754.jpg", "caption": "a stop sign on a wooden post", "annotations": [{"polygon": [[226, 126], [226, 126], [239, 127], [346, 174], [349, 241], [228, 211]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379766.jpg", "caption": "a truck is driving down the street with traffic", "annotations": [{"polygon": [[320, 239], [330, 255], [381, 220], [386, 225], [391, 220], [388, 216], [397, 209], [399, 215], [405, 210], [402, 207], [424, 190], [413, 175], [350, 217], [343, 214], [322, 238]], "text": "schepps the dairy best", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "schepps", "recog_valid": false, "glyph_recog_text": "chepps the dairy bes", "glyph_recog_ld": 0.25000037499981254}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248694.jpg", "caption": "a man standing in front of a table with bunches of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510860.jpg", "caption": "a woman sitting on a bench with a hat on", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248730.jpg", "caption": "a table with two cakes and a plate with a cherry", "annotations": [{"polygon": [[164, 330], [155, 345], [209, 406], [217, 395], [221, 391]], "text": "PRIZE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TRIZE", "recog_valid": false, "glyph_recog_text": "PRIZE", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248745.jpg", "caption": "a man jumping in the air to catch a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379820.jpg", "caption": "a group of kids sitting around a table eating food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248755.jpg", "caption": "a black cat laying on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248769.jpg", "caption": "a set of scissors with a picture of a man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379871.jpg", "caption": "a man reading to a baby", "annotations": [{"polygon": [[204, 201], [202, 234], [236, 232], [233, 204]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "R", "recog_valid": true, "glyph_recog_text": "R", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248809.jpg", "caption": "a group of people on a boat watching an elephant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117743.jpg", "caption": "a stop sign with stickers on it", "annotations": [{"polygon": [[167, 281], [314, 267], [313, 211], [175, 228]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510976.jpg", "caption": "two cell phones sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117781.jpg", "caption": "people are sitting in boats with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117784.jpg", "caption": "a kitchen with a microwave, refrigerator, and a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117789.jpg", "caption": "a dog in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379936.jpg", "caption": "the sign for the bb ranch home is on the corner of the street", "annotations": [{"polygon": [[264, 82], [251, 112], [257, 115], [266, 111], [330, 104], [335, 79]], "text": "Local", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Local!", "recog_valid": false, "glyph_recog_text": "Local", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[211, 110], [204, 136], [221, 135], [242, 132], [259, 140], [265, 139], [284, 129], [354, 121], [355, 116], [349, 105], [268, 113], [247, 113], [229, 109]], "text": "Burgerman", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Buigeiman", "recog_valid": false, "glyph_recog_text": "Burgerman", "glyph_recog_ld": 0.7777780246910837}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117796.jpg", "caption": "a city street with cars and people walking on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379941.jpg", "caption": "a man holding a donut in front of a donut shop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248876.jpg", "caption": "a cat sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248879.jpg", "caption": "a group of men playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379965.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511056.jpg", "caption": "a red and white fishing boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248925.jpg", "caption": "a man in yellow and black is riding a wave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511075.jpg", "caption": "a white suv parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248948.jpg", "caption": "a giraffe and zebras in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380039.jpg", "caption": "a cat is sitting on a motorcycle parked on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117922.jpg", "caption": "a stop sign with a vehicle at a time sign", "annotations": [{"polygon": [[232, 170], [234, 218], [331, 216], [350, 187], [345, 168]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511148.jpg", "caption": "a cat is laying in a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117937.jpg", "caption": "two men are jumping on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511157.jpg", "caption": "a delta airlines plane is seen at the airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117961.jpg", "caption": "a stop sign with a street sign on top of it", "annotations": [{"polygon": [[219, 136], [258, 101], [261, 125], [221, 158]], "text": "CFDAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CEDAR", "recog_valid": false, "glyph_recog_text": "CFDAR", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[177, 144], [249, 156], [248, 183], [177, 172]], "text": "SUMMIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SUMMIT", "recog_valid": true, "glyph_recog_text": "SUMMIT", "glyph_recog_ld": 1.0}, {"polygon": [[198, 275], [277, 231], [280, 316], [202, 339]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "营", "recog_valid": false, "glyph_recog_text": "0-0", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511179.jpg", "caption": "a group of people sitting at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380115.jpg", "caption": "a white car is parked at a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380128.jpg", "caption": "a laptop computer sitting on a table with a mouse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511218.jpg", "caption": "a fighter jet flying in the sky with its landing gear down", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380148.jpg", "caption": "a man is holding a bag of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511231.jpg", "caption": "a man on a bike on a bridge with a city in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380163.jpg", "caption": "a red and white airplane flying in the sky", "annotations": [{"polygon": [[322, 225], [370, 127], [376, 140], [320, 250], [318, 248]], "text": "jet2.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "par2.co1m", "recog_valid": false, "glyph_recog_text": "iwst.cam", "glyph_recog_ld": 0.333334074073251}, {"polygon": [[188, 259], [180, 305], [295, 293], [298, 254]], "text": "YORKSHIRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TGRKSYRe", "recog_valid": false, "glyph_recog_text": "YORKSHIRE", "glyph_recog_ld": 0.44444506172770915}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380171.jpg", "caption": "a dog sitting in the back of a camper van", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511249.jpg", "caption": "two women sitting on the ground with books and teddy bears", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249111.jpg", "caption": "a laptop computer on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511255.jpg", "caption": "a bathroom with a toilet, sink and two pictures on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249120.jpg", "caption": "a man walking on the street with a snow shovel", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249128.jpg", "caption": "a picture of a man sitting on a toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511276.jpg", "caption": "a group of people standing on a snow covered slope", "annotations": [{"polygon": [[206, 347], [209, 375], [281, 370], [277, 343]], "text": "FLOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FOW", "recog_valid": false, "glyph_recog_text": "FLOW", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511279.jpg", "caption": "a woman pulling luggage on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511301.jpg", "caption": "a parking meter on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249176.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[61, 66], [108, 67], [104, 137], [56, 136]], "text": "J.", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "一", "recog_valid": false, "glyph_recog_text": "一", "glyph_recog_ld": 1.0}, {"polygon": [[118, 66], [173, 69], [170, 137], [115, 136]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Ai", "recog_valid": false, "glyph_recog_text": "a", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[179, 69], [504, 83], [501, 175], [177, 155]], "text": "P.Morgan", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Morgan", "recog_valid": false, "glyph_recog_text": "P.Morgan", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511346.jpg", "caption": "a group of people standing around a skateboarder", "annotations": [{"polygon": [[346, 126], [430, 118], [431, 142], [347, 153]], "text": "SOUVIENS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SOUViENS", "recog_valid": false, "glyph_recog_text": "SOUVIENS", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[435, 144], [499, 137], [501, 158], [434, 167]], "text": "MARYSE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MARYSE", "recog_valid": true, "glyph_recog_text": "MARYSE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118131.jpg", "caption": "a display of teddy bears", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380274.jpg", "caption": "a toilet sitting next to a rusty trash can", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118150.jpg", "caption": "two men are preparing food in a restaurant", "annotations": [{"polygon": [[25, 249], [31, 292], [116, 274], [110, 233]], "text": "880", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "880", "recog_valid": true, "glyph_recog_text": "880", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249226.jpg", "caption": "a group of people standing in an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511373.jpg", "caption": "a red and white bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380301.jpg", "caption": "two girls on swings at a carnival", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118186.jpg", "caption": "a girl running to first base", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511402.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[191, 306], [334, 341], [330, 393], [196, 404], [190, 349]], "text": "Phillies", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YHllo", "recog_valid": false, "glyph_recog_text": "Phiies", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118188.jpg", "caption": "a large jetliner flying over a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249264.jpg", "caption": "a white lamb stands in the grass near a body of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511412.jpg", "caption": "a train with graffiti on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118207.jpg", "caption": "a woman on skis is going down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249288.jpg", "caption": "a group of motorcycles on a city street", "annotations": [{"polygon": [[421, 86], [431, 137], [494, 123], [486, 85]], "text": "FOREVER", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "", "recog_valid": false, "glyph_recog_text": "FOREVER", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[312, 85], [321, 132], [358, 119], [362, 146], [371, 144], [368, 85]], "text": "DISNEP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "!", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249290.jpg", "caption": "a cat sitting on a curb next to a green scooter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511438.jpg", "caption": "a group of people standing around an airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118234.jpg", "caption": "a refrigerator with a light shining on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511455.jpg", "caption": "two women playing tennis in different poses", "annotations": [{"polygon": [[23, 183], [31, 155], [54, 176], [61, 208], [57, 218]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "P", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249336.jpg", "caption": "a man in a suit and tie with a medal around his neck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249362.jpg", "caption": "a baseball player is holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118302.jpg", "caption": "a computer monitor, keyboard, and mouse on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380447.jpg", "caption": "a man on a bike with a red box on his back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249382.jpg", "caption": "three snowboarders in the air", "annotations": [{"polygon": [[210, 276], [230, 260], [244, 269], [272, 302], [255, 317]], "text": "RIDE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "心", "recog_valid": false, "glyph_recog_text": "RIDE", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[344, 226], [362, 231], [395, 293], [378, 291]], "text": "ALLIAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALLIAN", "recog_valid": true, "glyph_recog_text": "ALLIAN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249384.jpg", "caption": "a man wearing a purple shirt that says citizen volunteer", "annotations": [{"polygon": [[87, 261], [95, 221], [276, 236], [274, 273]], "text": "CITIZEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CITIZEN", "recog_valid": true, "glyph_recog_text": "CITIZEN", "glyph_recog_ld": 1.0}, {"polygon": [[86, 266], [274, 279], [274, 306], [86, 290]], "text": "VOLUNTEER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VOLUNTEER", "recog_valid": true, "glyph_recog_text": "VOLUNTEER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380457.jpg", "caption": "a brick wall", "annotations": [{"polygon": [[293, 267], [175, 268], [174, 233], [293, 234]], "text": "CECIL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CECIL", "recog_valid": true, "glyph_recog_text": "CECIL", "glyph_recog_ld": 1.0}, {"polygon": [[328, 235], [397, 233], [398, 267], [328, 265]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RD", "recog_valid": false, "glyph_recog_text": "R", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511537.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511553.jpg", "caption": "a car is parked at a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511571.jpg", "caption": "a man on a skateboard doing a trick on a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511580.jpg", "caption": "processed liquid marmalade recipe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511587.jpg", "caption": "a steam train on the tracks near a small village", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249451.jpg", "caption": "a baseball player throwing a ball to a batter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118404.jpg", "caption": "a bus is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380586.jpg", "caption": "a man is smiling while holding a donut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380588.jpg", "caption": "a black and white photo of a motorcycle parked in front of a gas station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249519.jpg", "caption": "a baseball player is running to the base", "annotations": [{"polygon": [[119, 189], [124, 178], [130, 180], [137, 183], [144, 187], [149, 192], [153, 199], [156, 204], [146, 208], [141, 202], [135, 197], [127, 194]], "text": "GIANT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GIANZ", "recog_valid": false, "glyph_recog_text": "GIANT", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511665.jpg", "caption": "three double decker buses parked in a lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118457.jpg", "caption": "three men sitting on the grass holding a soccer ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118459.jpg", "caption": "a bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249555.jpg", "caption": "a fire hydrant is painted with a design", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511706.jpg", "caption": "a tractor and a truck on a snowy road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511716.jpg", "caption": "a large clock tower in the middle of a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380651.jpg", "caption": "a sign that says madams organ", "annotations": [{"polygon": [[97, 289], [298, 284], [288, 316], [97, 319]], "text": "RESTAURANT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RCSTAURANT", "recog_valid": false, "glyph_recog_text": "RESTAURANT", "glyph_recog_ld": 0.9000000999999}, {"polygon": [[340, 281], [409, 282], [410, 315], [339, 315]], "text": "BAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BAR", "recog_valid": true, "glyph_recog_text": "BAR", "glyph_recog_ld": 1.0}, {"polygon": [[168, 110], [344, 97], [372, 86], [403, 99], [415, 126], [383, 178], [119, 191], [117, 179], [132, 148], [159, 115]], "text": "MADAM'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "(14DA))S", "recog_valid": false, "glyph_recog_text": "MADAM'S", "glyph_recog_ld": 0.37500078124902336}, {"polygon": [[139, 198], [204, 196], [260, 189], [400, 185], [398, 272], [349, 275], [285, 278], [164, 281], [127, 282], [112, 271], [105, 244], [115, 214], [127, 203]], "text": "ORGAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ORCAII", "recog_valid": false, "glyph_recog_text": "ORGAN", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[156, 323], [186, 325], [248, 325], [256, 328], [249, 345], [240, 354], [175, 355], [172, 358], [143, 361]], "text": "Heart", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Heart", "recog_valid": true, "glyph_recog_text": "Heart", "glyph_recog_ld": 1.0}, {"polygon": [[125, 359], [107, 392], [130, 392], [147, 377], [149, 361]], "text": "of", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "10", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[160, 360], [147, 391], [264, 390], [267, 386], [260, 374], [214, 360], [191, 357]], "text": "Adams", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Adamb", "recog_valid": false, "glyph_recog_text": "Adams", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[280, 358], [270, 391], [394, 392], [399, 385], [397, 372], [313, 358]], "text": "morgan", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Moroan", "recog_valid": false, "glyph_recog_text": "morgan", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[331, 320], [317, 344], [321, 350], [329, 354], [395, 353], [405, 321], [403, 320]], "text": "Soul", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Soul", "recog_valid": true, "glyph_recog_text": "Soul", "glyph_recog_ld": 1.0}, {"polygon": [[264, 331], [254, 349], [258, 355], [306, 354], [307, 350], [317, 325]], "text": "and", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "and", "recog_valid": true, "glyph_recog_text": "and", "glyph_recog_ld": 1.0}, {"polygon": [[306, 279], [296, 309], [298, 318], [315, 322], [336, 316], [335, 302], [318, 279]], "text": "&", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "&", "recog_valid": false, "glyph_recog_text": ":", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118535.jpg", "caption": "a baseball game with a batter and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511752.jpg", "caption": "a blue motor scooter parked on the sidewalk", "annotations": [{"polygon": [[375, 155], [378, 171], [391, 178], [424, 179], [429, 163], [419, 150], [406, 147], [393, 142]], "text": "Fye", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AIO", "recog_valid": false, "glyph_recog_text": "Fye", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249619.jpg", "caption": "a couple walking by a carnival with stuffed animals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511786.jpg", "caption": "a baseball game in progress with a pitcher throwing the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380732.jpg", "caption": "two women sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249688.jpg", "caption": "a traffic sign on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511866.jpg", "caption": "two women sitting on a bench with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380797.jpg", "caption": "a woman standing on the back of a red truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380827.jpg", "caption": "three men are standing in a room with a large box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118730.jpg", "caption": "a stop sign with a woman running in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249810.jpg", "caption": "a living room with a television, a chair, a table, a couch, a toy box and a star", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249813.jpg", "caption": "a city street with two buses and a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380889.jpg", "caption": "a man standing in front of a screen with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249817.jpg", "caption": "a woman playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249822.jpg", "caption": "a little girl standing in front of an open refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380898.jpg", "caption": "a street sign that says madrid", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118771.jpg", "caption": "a man on skis is going down a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511988.jpg", "caption": "a group of police motorcycles riding down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249891.jpg", "caption": "a bike and luggage on a train", "annotations": [{"polygon": [[281, 250], [274, 256], [305, 285], [309, 282]], "text": "FUJI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FUO", "recog_valid": false, "glyph_recog_text": "posl", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118827.jpg", "caption": "a soccer player is running after the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249905.jpg", "caption": "three men posing for a picture on skis", "annotations": [{"polygon": [[384, 272], [415, 269], [423, 307], [390, 308]], "text": "51", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "品", "recog_valid": false, "glyph_recog_text": "LO", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249906.jpg", "caption": "a yellow and blue train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380981.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118840.jpg", "caption": "a man playing a video game in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512068.jpg", "caption": "a cat sitting under a sign", "annotations": [{"polygon": [[173, 158], [211, 160], [217, 125], [174, 124]], "text": "NO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NO", "recog_valid": true, "glyph_recog_text": "NO", "glyph_recog_ld": 1.0}, {"polygon": [[232, 160], [305, 161], [306, 127], [232, 126]], "text": "BALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BALL", "recog_valid": true, "glyph_recog_text": "BALL", "glyph_recog_ld": 1.0}, {"polygon": [[185, 207], [186, 172], [289, 174], [289, 209]], "text": "GAMES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GAMES", "recog_valid": true, "glyph_recog_text": "GAMES", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249958.jpg", "caption": "a large airplane in a museum with people looking at it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249968.jpg", "caption": "a tv and a computer on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118909.jpg", "caption": "two stuffed animals sit on a shelf with books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118916.jpg", "caption": "a man standing in front of a building talking on a cell phone", "annotations": [{"polygon": [[397, 80], [369, 434], [456, 434], [506, 79]], "text": "LWELRY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "【-0-10>", "recog_valid": false, "glyph_recog_text": "J3 W i ", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249988.jpg", "caption": "a street sign with many directions on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118920.jpg", "caption": "a group of people standing around a large airplane", "annotations": [{"polygon": [[245, 174], [254, 185], [259, 193], [264, 207], [265, 214], [266, 232], [265, 250], [291, 255], [309, 255], [324, 254], [351, 255], [350, 213], [332, 210], [320, 187], [267, 176]], "text": "FedEx", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Teax", "recog_valid": false, "glyph_recog_text": "FedEx", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512143.jpg", "caption": "a large clock tower with a large clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512152.jpg", "caption": "a double decker bus", "annotations": [{"polygon": [[273, 163], [273, 163], [307, 161], [321, 157], [332, 159], [330, 184], [300, 188], [273, 187]], "text": "W J C", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WJC", "recog_valid": false, "glyph_recog_text": "WJC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118964.jpg", "caption": "a baseball player signing autographs for fans", "annotations": [{"polygon": [[215, 271], [210, 316], [236, 304], [256, 296], [282, 290], [296, 289], [315, 288], [323, 286], [325, 278], [325, 260], [305, 258], [273, 262], [244, 266]], "text": "ANGELS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "XNGELS", "recog_valid": false, "glyph_recog_text": "ANGELS", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118968.jpg", "caption": "an old photo of a train track with buildings in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512186.jpg", "caption": "two people on surfboards in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381116.jpg", "caption": "two baseball players are running to first base", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381119.jpg", "caption": "a group of young boys standing on a skateboard ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118976.jpg", "caption": "a toilet and a paper towel dispenser in a bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381128.jpg", "caption": "a man sitting on a bench next to a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381139.jpg", "caption": "a surfer riding a wave in front of a cliff", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381163.jpg", "caption": "a red bus driving down a street with a tree in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512258.jpg", "caption": "a double decker bus on a city street", "annotations": [{"polygon": [[91, 266], [104, 266], [112, 274], [104, 294], [99, 297], [90, 295], [78, 295]], "text": "llg", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "兰", "recog_valid": false, "glyph_recog_text": "611", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381199.jpg", "caption": "a lunch box with fruit, crackers, and cheese", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512275.jpg", "caption": "a banana tree with a bunch of bananas hanging from it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381217.jpg", "caption": "a man standing in a kitchen with two pizzas on the counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381244.jpg", "caption": "a woman cutting a cake with a knife", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381256.jpg", "caption": "a black and white photo of a room with several refrigerators", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250195.jpg", "caption": "a man walking on a wet sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119124.jpg", "caption": "a motorcycle with a backpack on it", "annotations": [{"polygon": [[116, 241], [154, 260], [161, 241], [122, 225]], "text": "Dalat", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Dalat", "recog_valid": true, "glyph_recog_text": "Datat", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[154, 259], [164, 243], [241, 275], [227, 298], [178, 278]], "text": "Easyrider", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Easyrider", "recog_valid": true, "glyph_recog_text": "Easyride", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512351.jpg", "caption": "a kitchen with a refrigerator, sink and window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119141.jpg", "caption": "a bus is driving down the road", "annotations": [{"polygon": [[318, 142], [320, 119], [380, 141], [380, 161]], "text": "Andrews", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Andrews", "recog_valid": true, "glyph_recog_text": "Andrews", "glyph_recog_ld": 1.0}, {"polygon": [[390, 146], [390, 164], [432, 176], [431, 162], [418, 154]], "text": "Leuchars", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Leucnars", "recog_valid": false, "glyph_recog_text": "Leucuars", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381301.jpg", "caption": "a blue bus parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250239.jpg", "caption": "a man in a chef's hat standing in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119169.jpg", "caption": "a man is holding a frisbee in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381314.jpg", "caption": "two men standing in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250250.jpg", "caption": "a baseball player is holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512405.jpg", "caption": "a man holding a pair of scissors in his hand", "annotations": [{"polygon": [[456, 130], [371, 374], [428, 387], [495, 140]], "text": "NCO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "GA", "recog_valid": false, "glyph_recog_text": "NC(", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381333.jpg", "caption": "a group of people standing around a food truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119195.jpg", "caption": "a display case with sandwiches and pastries", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250268.jpg", "caption": "a black and white photo of a parking meter", "annotations": [{"polygon": [[133, 305], [129, 373], [202, 369], [201, 306]], "text": "6328", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6508", "recog_valid": false, "glyph_recog_text": "6328", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119199.jpg", "caption": "a stop sign with a bird on it", "annotations": [{"polygon": [[217, 228], [213, 274], [330, 294], [330, 249]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250289.jpg", "caption": "a road sign with a black and white stripe", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381377.jpg", "caption": "a group of people playing soccer on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512459.jpg", "caption": "a bus is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119244.jpg", "caption": "a group of people skiing down a slope", "annotations": [{"polygon": [[145, 231], [199, 252], [196, 266], [137, 244]], "text": "USASA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "USASA", "recog_valid": true, "glyph_recog_text": "USASA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381390.jpg", "caption": "a street sign and a traffic light on a pole", "annotations": [{"polygon": [[271, 365], [271, 365], [272, 379], [342, 332], [339, 317]], "text": "BELMONT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BELMONT", "recog_valid": true, "glyph_recog_text": "BELMONT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381400.jpg", "caption": "a man on a snowboard doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250360.jpg", "caption": "a female tennis player in blue and white is about to hit the ball", "annotations": [{"polygon": [[77, 305], [72, 313], [92, 324], [120, 346], [127, 339], [109, 325]], "text": "BLX", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ELX", "recog_valid": false, "glyph_recog_text": "BLN", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512506.jpg", "caption": "a man standing on a tennis court with a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119299.jpg", "caption": "a man standing in front of a train with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119304.jpg", "caption": "a group of people standing around a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250381.jpg", "caption": "a man standing on a beach holding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381470.jpg", "caption": "a person leaning over a sink in a bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381475.jpg", "caption": "a man holding a surfboard", "annotations": [{"polygon": [[248, 208], [230, 224], [279, 280], [294, 260]], "text": "EMBA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EMBA", "recog_valid": true, "glyph_recog_text": "EMBA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512557.jpg", "caption": "a boat that is in the woods", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381492.jpg", "caption": "a man is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250424.jpg", "caption": "a dog sitting on a table with a birthday cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119357.jpg", "caption": "a view of a city street from inside a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381509.jpg", "caption": "a man on a skateboard doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250451.jpg", "caption": "a person riding skis down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250474.jpg", "caption": "a man playing a video game on a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119449.jpg", "caption": "a yellow school bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250543.jpg", "caption": "a man sitting on a chair holding a cat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119492.jpg", "caption": "a man and woman cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250569.jpg", "caption": "a television screen with a news anchor and a man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250597.jpg", "caption": "a woman brushing her teeth in front of a tent", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250607.jpg", "caption": "a toilet in a small room with pipes and a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119540.jpg", "caption": "a black and white photo of a speed boat on the water", "annotations": [{"polygon": [[12, 360], [9, 411], [38, 412], [45, 401], [42, 361]], "text": "S", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "09", "recog_valid": false, "glyph_recog_text": "0", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512797.jpg", "caption": "a train station with a clock and a clock tower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512806.jpg", "caption": "a woman in a leather outfit posing on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119594.jpg", "caption": "several boats are docked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250677.jpg", "caption": "a bus driving down a street with cars parked on both sides", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250684.jpg", "caption": "a pizza on a wooden board", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381759.jpg", "caption": "a truck with eddie stobart on it", "annotations": [{"polygon": [[39, 256], [41, 197], [212, 221], [208, 268]], "text": "Eddie", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Eddie", "recog_valid": true, "glyph_recog_text": "Eddie", "glyph_recog_ld": 1.0}, {"polygon": [[218, 221], [216, 267], [362, 278], [364, 243]], "text": "Stobart", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Stobart", "recog_valid": true, "glyph_recog_text": "Stobart", "glyph_recog_ld": 1.0}, {"polygon": [[2, 129], [0, 153], [53, 166], [56, 145]], "text": "RUP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "RUP", "recog_valid": true, "glyph_recog_text": "RUP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119618.jpg", "caption": "a classic car with an umbrella on top", "annotations": [{"polygon": [[126, 350], [252, 360], [252, 335], [132, 326]], "text": "JMT 563 W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JMT 563W", "recog_valid": false, "glyph_recog_text": "JMT 563 W", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250698.jpg", "caption": "a bowl of green dip with vegetables and carrots", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512845.jpg", "caption": "a display of neck ties and a box of jewelry", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250708.jpg", "caption": "highland cow in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381810.jpg", "caption": "a large white ship with a large banner on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381811.jpg", "caption": "a parking meter on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119678.jpg", "caption": "an old postcard shows a group of people on horses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381828.jpg", "caption": "a woman walking down the street on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381856.jpg", "caption": "a woman sitting in front of a birthday cake", "annotations": [{"polygon": [[216, 477], [218, 498], [320, 507], [320, 480]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Birthday", "recog_valid": true, "glyph_recog_text": "Birthday", "glyph_recog_ld": 1.0}, {"polygon": [[222, 328], [222, 351], [228, 360], [243, 360], [277, 354], [283, 341], [287, 327], [281, 315]], "text": "oe", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "oe", "recog_valid": true, "glyph_recog_text": "o e", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381865.jpg", "caption": "a stop sign with a tree in the background", "annotations": [{"polygon": [[249, 14], [259, 16], [282, 108], [334, 316], [340, 451], [233, 294]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512941.jpg", "caption": "two people on skis are walking down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250808.jpg", "caption": "a traffic light with a red light on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250809.jpg", "caption": "a truck driving down a street with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512961.jpg", "caption": "a model train set with a small town and a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119751.jpg", "caption": "a donut with bacon and egg on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512968.jpg", "caption": "a bed with a pile of books on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119766.jpg", "caption": "a person holding skis and a helmet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250844.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119787.jpg", "caption": "a man walking a dog and talking on a cell phone", "annotations": [{"polygon": [[3, 178], [18, 175], [56, 182], [96, 185], [99, 210], [86, 211], [1, 213]], "text": "AKKERS & ZN.", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "AKKERS & ZN", "recog_valid": false, "glyph_recog_text": "AKERS & ZH.", "glyph_recog_ld": 0.7272729752063862}, {"polygon": [[306, 231], [307, 260], [312, 266], [345, 239], [324, 209]], "text": "H", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "H", "recog_valid": true, "glyph_recog_text": "H", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381931.jpg", "caption": "a man doing a trick on a skateboard", "annotations": [{"polygon": [[146, 15], [142, 34], [228, 65], [227, 45]], "text": "ANTARCTICA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ANTARCTICA", "recog_valid": true, "glyph_recog_text": "ANTARCTICA", "glyph_recog_ld": 1.0}, {"polygon": [[256, 115], [240, 142], [365, 196], [381, 164]], "text": "NOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ISTION NOW", "recog_valid": false, "glyph_recog_text": "NOW", "glyph_recog_ld": 0.3000006999993}, {"polygon": [[342, 13], [258, 64], [248, 74], [248, 105], [385, 158], [446, 44]], "text": "AN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "4N", "recog_valid": false, "glyph_recog_text": "AN", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119798.jpg", "caption": "an old truck parked on a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513015.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381945.jpg", "caption": "a pizza on a baking sheet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250880.jpg", "caption": "a fighter jet parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119808.jpg", "caption": "a man walking down the street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119815.jpg", "caption": "two women walking down a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119822.jpg", "caption": "a large airplane parked on the tarmac with people around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250909.jpg", "caption": "a table with a bunch of fruit and vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513053.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381985.jpg", "caption": "a vase of red flowers on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119849.jpg", "caption": "a laptop computer sitting on a desk with a coffee cup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513075.jpg", "caption": "a sign has texts", "annotations": [{"polygon": [[37, 170], [37, 170], [261, 168], [263, 221], [38, 220]], "text": "HEGGIES", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HEGGIES", "recog_valid": true, "glyph_recog_text": "HEGGIES", "glyph_recog_ld": 1.0}, {"polygon": [[293, 168], [467, 169], [467, 218], [305, 219]], "text": "WYND", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WYND", "recog_valid": true, "glyph_recog_text": "WYND", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382005.jpg", "caption": "a white car parked next to a yellow fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513080.jpg", "caption": "a man in a pink shirt is riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119868.jpg", "caption": "a person skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119882.jpg", "caption": "a yellow ice cream truck parked on a street", "annotations": [{"polygon": [[253, 176], [253, 189], [301, 173], [301, 159]], "text": "DIEDEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "E", "recog_valid": false, "glyph_recog_text": "DHEDEL", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250966.jpg", "caption": "two giraffes standing next to each other in an enclosure", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513111.jpg", "caption": "a sign that says five different plates on a bus", "annotations": [{"polygon": [[300, 164], [320, 185], [338, 213], [352, 269], [357, 276], [377, 274], [378, 262], [372, 232], [362, 205], [351, 186], [336, 164], [315, 139]], "text": "PLATES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PLATES", "recog_valid": true, "glyph_recog_text": "PLATES", "glyph_recog_ld": 1.0}, {"polygon": [[141, 152], [163, 177], [181, 158], [189, 152], [205, 143], [223, 140], [240, 139], [261, 144], [286, 155], [300, 130], [257, 110], [239, 107], [201, 110], [174, 122]], "text": "DIFFRENT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OIFFRENT", "recog_valid": false, "glyph_recog_text": "DIFFRENT", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[129, 171], [123, 187], [118, 200], [114, 217], [112, 257], [142, 260], [142, 232], [145, 216], [155, 187]], "text": "FIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3A1J", "recog_valid": false, "glyph_recog_text": "3AIH", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[225, 305], [221, 412], [239, 431], [261, 431], [271, 419], [275, 400], [274, 365], [295, 367], [297, 356], [293, 340], [262, 303], [248, 302]], "text": "6.", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "co", "recog_valid": false, "glyph_recog_text": "o.", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513123.jpg", "caption": "a group of people skiing down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119909.jpg", "caption": "a small bedroom with a bed, desk, and a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382083.jpg", "caption": "a man walking down the street", "annotations": [{"polygon": [[111, 208], [109, 266], [152, 267], [160, 239], [160, 211]], "text": "SF", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SF", "recog_valid": true, "glyph_recog_text": "S", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251016.jpg", "caption": "a bag with a passport, passport holder, passport, wallet, passport holder, passport, passport holder, passport holder, passport holder, passport holder, passport holder", "annotations": [{"polygon": [[434, 235], [397, 195], [390, 202], [428, 241]], "text": "LITTLEJOY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LITTLEIC", "recog_valid": false, "glyph_recog_text": "SiEErOe", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251032.jpg", "caption": "many boats are docked in the water near a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382107.jpg", "caption": "a man is hanging upside down on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119994.jpg", "caption": "a baseball player swinging at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382154.jpg", "caption": "a street sign with a number on it", "annotations": [{"polygon": [[211, 205], [210, 257], [260, 241], [273, 237], [273, 226], [273, 209], [270, 182]], "text": "265", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "265", "recog_valid": true, "glyph_recog_text": "265", "glyph_recog_ld": 1.0}, {"polygon": [[222, 301], [220, 369], [276, 376], [276, 316]], "text": "144", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "一寸", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382182.jpg", "caption": "a flock of pigeons flying over a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513282.jpg", "caption": "a boat is sailing on the river thames", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251168.jpg", "caption": "a group of young baseball players are celebrating", "annotations": [{"polygon": [[358, 143], [384, 145], [406, 162], [401, 181], [368, 172], [335, 170]], "text": "RAPI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "RAPI", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[143, 352], [126, 393], [136, 410], [148, 422], [169, 428], [176, 426], [190, 371]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "40", "recog_valid": false, "glyph_recog_text": "9", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251183.jpg", "caption": "a stop sign is on a street with houses", "annotations": [{"polygon": [[307, 215], [309, 242], [364, 245], [367, 229], [364, 217]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513359.jpg", "caption": "a man snowboarding down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120155.jpg", "caption": "a young boy in a baseball uniform", "annotations": [{"polygon": [[283, 139], [335, 132], [336, 155], [288, 165]], "text": "BLAIST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BLAIS", "recog_valid": false, "glyph_recog_text": "BLAIST", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[299, 167], [326, 160], [337, 195], [325, 205], [311, 206]], "text": "11", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "W", "recog_valid": false, "glyph_recog_text": "11", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382320.jpg", "caption": "a bus on a wet street", "annotations": [{"polygon": [[390, 401], [474, 380], [478, 400], [390, 420]], "text": "yakobus", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "yakobus", "recog_valid": true, "glyph_recog_text": "yakobus", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251256.jpg", "caption": "air canada boeing 767-300", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382361.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[224, 136], [220, 153], [255, 168], [258, 150]], "text": "TCB", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1日", "recog_valid": false, "glyph_recog_text": "TCB", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513441.jpg", "caption": "a plane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251312.jpg", "caption": "a black and white photo of a street sign", "annotations": [{"polygon": [[194, 23], [187, 51], [229, 53], [226, 44], [205, 27]], "text": "blue", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "bivve", "recog_valid": false, "glyph_recog_text": "biue", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513456.jpg", "caption": "a baseball player in blue and white uniform", "annotations": [{"polygon": [[184, 240], [190, 268], [274, 256], [267, 227]], "text": "ERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ERS", "recog_valid": true, "glyph_recog_text": "ERS", "glyph_recog_ld": 1.0}, {"polygon": [[225, 270], [235, 321], [290, 312], [288, 260]], "text": "29", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "29", "recog_valid": true, "glyph_recog_text": "29", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251318.jpg", "caption": "a traffic light on a busy street with cars and trucks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251335.jpg", "caption": "a sign that says no swimming", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513482.jpg", "caption": "two people walking in the rain with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120276.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513531.jpg", "caption": "a red train is parked at a train station", "annotations": [{"polygon": [[474, 264], [474, 264], [475, 287], [438, 297], [435, 276]], "text": "REGIO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "REGIO", "recog_valid": true, "glyph_recog_text": "REGIO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513541.jpg", "caption": "a small blue and white airplane parked on the tarmac", "annotations": [{"polygon": [[205, 267], [259, 252], [258, 259], [258, 276], [261, 296], [206, 304], [205, 291]], "text": "215", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "215", "recog_valid": true, "glyph_recog_text": "215", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120329.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[115, 240], [117, 200], [159, 203], [157, 240]], "text": "15", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "15", "recog_valid": true, "glyph_recog_text": "15", "glyph_recog_ld": 1.0}, {"polygon": [[192, 350], [187, 271], [278, 278], [280, 359]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "20", "recog_valid": true, "glyph_recog_text": "20", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513573.jpg", "caption": "a fence behind the baseball bats", "annotations": [{"polygon": [[210, 81], [244, 97], [222, 135], [211, 129]], "text": "7", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "7", "recog_valid": true, "glyph_recog_text": "卜", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120369.jpg", "caption": "a bunch of bananas and oranges sitting on a table", "annotations": [{"polygon": [[324, 164], [331, 146], [374, 164], [368, 183]], "text": "I'M", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IPM", "recog_valid": false, "glyph_recog_text": "I'M", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[318, 177], [323, 164], [368, 184], [363, 196]], "text": "ECO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ECO", "recog_valid": true, "glyph_recog_text": "ECO", "glyph_recog_ld": 1.0}, {"polygon": [[311, 196], [354, 214], [349, 228], [306, 209]], "text": "Chiquita", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Chiquita", "recog_valid": true, "glyph_recog_text": "Chiquita", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382517.jpg", "caption": "a room with a lot of old suitcases and a chair", "annotations": [{"polygon": [[53, 476], [71, 444], [83, 476], [71, 496], [60, 496]], "text": "ROOK", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "K", "recog_valid": false, "glyph_recog_text": "ROOK", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[78, 377], [38, 432], [49, 449], [84, 394]], "text": "Campsons", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Omusons", "recog_valid": false, "glyph_recog_text": "Camgpsons", "glyph_recog_ld": 0.5555560493821674}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120400.jpg", "caption": "a man sitting at a table with a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120416.jpg", "caption": "a baseball player throwing a ball", "annotations": [{"polygon": [[283, 177], [293, 206], [324, 202], [319, 174]], "text": "46", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "46", "recog_valid": true, "glyph_recog_text": "46", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513657.jpg", "caption": "a man on a motorcycle rides past a bus", "annotations": [{"polygon": [[142, 188], [401, 179], [405, 209], [141, 223]], "text": "SHIZUWAKANKO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SHIZUWAKANKO", "recog_valid": true, "glyph_recog_text": "SHIZUWAKANKO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251523.jpg", "caption": "a person sitting at a desk eating a donut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513676.jpg", "caption": "a young boy sitting at a table with a donut and a bottle of water", "annotations": [{"polygon": [[357, 94], [436, 111], [436, 111], [426, 135], [349, 115], [349, 115]], "text": "Dressing", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Dressing", "recog_valid": true, "glyph_recog_text": "Dressing", "glyph_recog_ld": 1.0}, {"polygon": [[279, 456], [280, 472], [292, 468], [294, 471], [298, 471], [298, 467], [313, 461], [324, 452], [329, 442], [322, 432], [313, 442], [305, 448], [307, 454], [287, 461], [284, 454]], "text": "AquaBall", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Aaal", "recog_valid": false, "glyph_recog_text": "AquaBal", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120462.jpg", "caption": "a truck with graffiti on it parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382631.jpg", "caption": "a street at night with a bridge in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120491.jpg", "caption": "a bottle of orange juice next to an orange", "annotations": [{"polygon": [[296, 215], [321, 229], [340, 233], [359, 226], [382, 218], [379, 254], [360, 261], [343, 264], [316, 264], [298, 258], [285, 242]], "text": "orange", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "branye", "recog_valid": false, "glyph_recog_text": "orange", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[392, 207], [423, 190], [418, 221], [393, 229]], "text": "biH", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "bih", "recog_valid": false, "glyph_recog_text": "biH", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382659.jpg", "caption": "a river with buildings and a bridge in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251589.jpg", "caption": "a military truck with guns on top", "annotations": [{"polygon": [[383, 220], [382, 253], [414, 251], [417, 216]], "text": "BARON", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BAROM", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251591.jpg", "caption": "a red fire hydrant next to a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382664.jpg", "caption": "a pitcher throwing a baseball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513743.jpg", "caption": "a cat sitting on a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382695.jpg", "caption": "a baseball player swinging at a ball during a game", "annotations": [{"polygon": [[342, 183], [340, 199], [345, 209], [350, 209], [360, 201], [393, 189], [396, 177], [381, 174], [361, 181], [354, 188]], "text": "Oriolez", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Oueg", "recog_valid": false, "glyph_recog_text": "Oriolez", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513796.jpg", "caption": "a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120585.jpg", "caption": "a television set on a shelf", "annotations": [{"polygon": [[272, 464], [300, 466], [302, 502], [273, 503]], "text": "NORA", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "NORA", "recog_valid": true, "glyph_recog_text": "zoo", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382731.jpg", "caption": "two baseball players are high fiving each other", "annotations": [{"polygon": [[150, 147], [185, 151], [190, 159], [187, 188], [146, 186], [140, 178], [141, 155]], "text": "32", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "32", "recog_valid": true, "glyph_recog_text": "32", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120595.jpg", "caption": "a sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382745.jpg", "caption": "a pizza with meat, olives and peppers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513832.jpg", "caption": "a person doing a flip on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251696.jpg", "caption": "a man and a woman on a subway train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251697.jpg", "caption": "two pictures of horses pulling a carriage", "annotations": [{"polygon": [[337, 275], [344, 283], [353, 275], [364, 268], [372, 264], [382, 259], [394, 257], [405, 256], [404, 244], [392, 245], [380, 247], [370, 251], [357, 259], [348, 264]], "text": "DOUBLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DOUBLE", "recog_valid": true, "glyph_recog_text": "DOUBLE", "glyph_recog_ld": 1.0}, {"polygon": [[429, 257], [443, 263], [452, 266], [461, 271], [465, 275], [470, 278], [473, 283], [475, 285], [488, 278], [481, 271], [468, 260], [454, 253], [443, 249], [430, 245]], "text": "FARMS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "FARMS", "recog_valid": true, "glyph_recog_text": "FARMS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251700.jpg", "caption": "a red refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251701.jpg", "caption": "a statue of a man on a horse in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120642.jpg", "caption": "a clock tower in a town", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120645.jpg", "caption": "two buses are driving down a road near a canyon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513863.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120674.jpg", "caption": "two men jumping to catch a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120676.jpg", "caption": "a woman sitting on a bench taking a picture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120718.jpg", "caption": "a man standing in front of a red bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513943.jpg", "caption": "a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513961.jpg", "caption": "a black and white photo of people sitting on benches", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251818.jpg", "caption": "a man is doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513966.jpg", "caption": "a car driving down the street in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251835.jpg", "caption": "bill bryson's shakespeare", "annotations": [{"polygon": [[185, 208], [132, 353], [71, 336], [82, 311], [146, 196]], "text": "Bryson", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "uosAIg", "recog_valid": false, "glyph_recog_text": "uos/ug", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[208, 231], [170, 338], [146, 330], [186, 225]], "text": "Shakespeare", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Shakespeare", "recog_valid": true, "glyph_recog_text": "Shakespeare", "glyph_recog_ld": 1.0}, {"polygon": [[210, 379], [206, 392], [264, 406], [301, 412], [304, 403], [243, 386]], "text": "Shakespeare", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Shakespeare", "recog_valid": true, "glyph_recog_text": "Shakespeare", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513980.jpg", "caption": "two people standing in front of a sign that says usualia", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513983.jpg", "caption": "a cake with a carousel horse on it", "annotations": [{"polygon": [[308, 168], [327, 154], [370, 217], [348, 234]], "text": "pamie", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "feornia", "recog_valid": false, "glyph_recog_text": "pamie", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[400, 149], [430, 132], [444, 155], [414, 171]], "text": "we", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "J。", "recog_valid": false, "glyph_recog_text": "we", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[440, 127], [489, 109], [504, 137], [452, 153]], "text": "love", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Aoue", "recog_valid": false, "glyph_recog_text": "love", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[447, 169], [491, 149], [512, 177], [467, 198]], "text": "you", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "gou", "recog_valid": false, "glyph_recog_text": "you", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[196, 354], [194, 389], [292, 395], [292, 363]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Happy", "recog_valid": true, "glyph_recog_text": "Happy", "glyph_recog_ld": 1.0}, {"polygon": [[223, 380], [218, 419], [343, 423], [345, 380]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Bootndauy", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.5555560493821674}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251842.jpg", "caption": "a bicycle parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514002.jpg", "caption": "a cat with a cone on its head", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382929.jpg", "caption": "a train car with graffiti on it", "annotations": [{"polygon": [[143, 269], [328, 261], [316, 318], [132, 323]], "text": "DEWOS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "D2A0D", "recog_valid": false, "glyph_recog_text": "DEWOS", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251860.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120790.jpg", "caption": "a bus parked near a fountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514006.jpg", "caption": "a pink bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382945.jpg", "caption": "a man is lighting up a birthday cake", "annotations": [{"polygon": [[114, 354], [136, 403], [152, 406], [157, 405], [141, 364], [127, 355]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Sancg.", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[85, 347], [148, 457], [134, 448], [133, 454], [142, 462], [169, 472], [154, 441], [162, 439], [143, 406], [129, 390], [122, 371], [103, 353], [87, 348]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Eithiey", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120805.jpg", "caption": "the toilet is open and the door is open", "annotations": [{"polygon": [[158, 341], [186, 339], [205, 336], [209, 334], [222, 330], [220, 348], [211, 351], [196, 356], [178, 359], [158, 361]], "text": "PARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PARK", "recog_valid": true, "glyph_recog_text": "PARK", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382953.jpg", "caption": "a person riding a motorcycle on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382958.jpg", "caption": "a man standing on a tree branch in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382969.jpg", "caption": "a person wearing blue converse sneakers on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120836.jpg", "caption": "a sandwich with cucumber and cream cheese on a plate", "annotations": [{"polygon": [[143, 216], [148, 226], [209, 206], [204, 196]], "text": "EDITION PATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FDITION PATE", "recog_valid": false, "glyph_recog_text": "ADTENNPNTE", "glyph_recog_ld": 0.5000004166663194}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514055.jpg", "caption": "a sign that says stop", "annotations": [{"polygon": [[114, 366], [108, 413], [251, 432], [254, 391]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251922.jpg", "caption": "a blue bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120926.jpg", "caption": "a family poses for a picture with elephants", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252010.jpg", "caption": "two skateboarders in the air doing tricks in an indoor parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383085.jpg", "caption": "a busy street with many people walking and riding motorcycles", "annotations": [{"polygon": [[342, 168], [340, 194], [373, 194], [367, 164]], "text": "CK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ck", "recog_valid": false, "glyph_recog_text": "CK", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120952.jpg", "caption": "a bus driving down a street with people on it", "annotations": [{"polygon": [[173, 307], [172, 335], [205, 337], [207, 308]], "text": "Loop", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Loop", "recog_valid": true, "glyph_recog_text": "Loop", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252038.jpg", "caption": "three people posing for a picture with wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514186.jpg", "caption": "a young boy riding a skateboard in a pool", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383131.jpg", "caption": "a blue bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514207.jpg", "caption": "a rusty old train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514209.jpg", "caption": "two plates of food on a table with chopsticks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383157.jpg", "caption": "a pot filled with broccoli on the stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383158.jpg", "caption": "a bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252092.jpg", "caption": "two baseball players in action on the field", "annotations": [{"polygon": [[9, 271], [10, 328], [135, 326], [132, 268]], "text": "Best", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Best", "recog_valid": true, "glyph_recog_text": "Best", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514243.jpg", "caption": "a woman sitting on a bench", "annotations": [{"polygon": [[319, 189], [276, 194], [276, 223], [322, 220]], "text": "WK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "WK", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121032.jpg", "caption": "a model sailboat floating on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514251.jpg", "caption": "a kitchen with a center island and bar stools", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252122.jpg", "caption": "a street sign on a building", "annotations": [{"polygon": [[372, 269], [399, 232], [394, 199], [369, 240], [366, 257], [370, 271]], "text": "GIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GIE", "recog_valid": false, "glyph_recog_text": "GIVE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121052.jpg", "caption": "a woman holding a child under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252132.jpg", "caption": "a large airplane parked at an airport terminal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383212.jpg", "caption": "a man riding a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383213.jpg", "caption": "the new town, london, england", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514318.jpg", "caption": "a group of motorcycles on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383250.jpg", "caption": "a desk with two computers and a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121107.jpg", "caption": "a sign that says the old apple farm is not associated with the new apple farm", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252193.jpg", "caption": "a laptop computer sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252194.jpg", "caption": "a street light with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252215.jpg", "caption": "a man and a woman are posing for a picture", "annotations": [{"polygon": [[357, 331], [346, 354], [305, 332], [286, 313], [295, 305]], "text": "ZZA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6774", "recog_valid": false, "glyph_recog_text": "ZZA", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[338, 476], [379, 467], [353, 514], [311, 513]], "text": "Pi", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "P i", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252217.jpg", "caption": "a display of apples and pears", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514369.jpg", "caption": "a red light is on a train track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514387.jpg", "caption": "a cow is standing behind a fence with a tag on its ear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121174.jpg", "caption": "a boy on a skateboard", "annotations": [{"polygon": [[212, 162], [223, 163], [241, 165], [253, 168], [264, 174], [270, 180], [273, 186], [264, 202], [257, 195], [248, 189], [234, 182], [222, 180], [210, 183]], "text": "VINNETK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VNNETK", "recog_valid": false, "glyph_recog_text": "VINNETK", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[176, 126], [182, 118], [191, 110], [200, 106], [205, 104], [207, 111], [199, 117], [195, 123], [191, 128], [187, 131], [184, 135], [181, 141]], "text": "WINN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Nu", "recog_valid": false, "glyph_recog_text": "WINN", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121181.jpg", "caption": "a group of men playing a game of frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383329.jpg", "caption": "a man holding a stop sign", "annotations": [{"polygon": [[64, 171], [206, 156], [212, 199], [68, 221]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514404.jpg", "caption": "a child sitting at a table with a pair of scissors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383348.jpg", "caption": "a basket of apples with a sign that says wine wrap", "annotations": [{"polygon": [[310, 347], [328, 318], [338, 310], [362, 270], [367, 259], [388, 220], [405, 188], [377, 159], [330, 245], [323, 246], [289, 290], [249, 361]], "text": "SAP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WINE", "recog_valid": false, "glyph_recog_text": "SAP", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[353, 336], [396, 265], [398, 241], [389, 234], [337, 329]], "text": "SAP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SAP", "recog_valid": true, "glyph_recog_text": "SAP", "glyph_recog_ld": 1.0}, {"polygon": [[382, 355], [436, 295], [409, 269], [365, 327], [356, 340], [360, 349]], "text": "994", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "99", "recog_valid": false, "glyph_recog_text": "994", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252277.jpg", "caption": "a book on a table next to a laptop", "annotations": [{"polygon": [[250, 220], [250, 220], [249, 262], [249, 262], [428, 262], [428, 262], [429, 200]], "text": "EUROPE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EUROPE", "recog_valid": true, "glyph_recog_text": "EUROPE", "glyph_recog_ld": 1.0}, {"polygon": [[485, 184], [452, 230], [462, 235], [496, 189]], "text": "BUDCET", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BUDCET", "recog_valid": true, "glyph_recog_text": "BUDCET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514454.jpg", "caption": "a fire truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383391.jpg", "caption": "a man and woman standing in a room with their hands up", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121249.jpg", "caption": "a knife on a table", "annotations": [{"polygon": [[44, 290], [101, 343], [144, 340], [187, 317], [192, 283], [188, 267], [166, 240], [123, 216], [85, 217], [54, 224], [34, 241], [34, 270]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[275, 252], [268, 277], [280, 297], [306, 315], [357, 306], [373, 289], [371, 263], [352, 249], [358, 229], [339, 213], [310, 206], [283, 210], [270, 220], [267, 242]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "8", "recog_valid": true, "glyph_recog_text": "8", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383401.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252340.jpg", "caption": "a stop sign and a green and white sign", "annotations": [{"polygon": [[75, 292], [76, 256], [156, 258], [156, 292]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[293, 160], [294, 195], [369, 194], [368, 162]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383424.jpg", "caption": "a large jet airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514513.jpg", "caption": "a bulldog laying on the floor with a water bottle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252375.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514519.jpg", "caption": "an old train car with graffiti on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383452.jpg", "caption": "a man holding a snowboard on a snowy mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383453.jpg", "caption": "three people standing in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514528.jpg", "caption": "a young boy holding a baseball bat on a baseball field", "annotations": [{"polygon": [[350, 248], [348, 304], [468, 298], [466, 243]], "text": "DIXO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "DIXO", "recog_valid": true, "glyph_recog_text": "DIXO", "glyph_recog_ld": 1.0}, {"polygon": [[359, 312], [357, 367], [472, 361], [473, 307]], "text": "FIELI", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "FIELI", "recog_valid": true, "glyph_recog_text": "FIELI", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514531.jpg", "caption": "a stop sign with a no stopping sign attached to it", "annotations": [{"polygon": [[160, 116], [158, 191], [365, 188], [364, 104]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514558.jpg", "caption": "a baseball player is about to throw the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514563.jpg", "caption": "a group of people riding bikes on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121372.jpg", "caption": "a conveyor belt with a line of donuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252448.jpg", "caption": "people walking in the rain with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252457.jpg", "caption": "a teddy bear sitting on a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514601.jpg", "caption": "a train station with benches and a sign that says a sweet way to go", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383564.jpg", "caption": "a woman walking by a bike parked outside a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252504.jpg", "caption": "a penguin wearing a bow tie and holding a bottle of alcohol", "annotations": [{"polygon": [[309, 39], [307, 80], [317, 96], [332, 95], [341, 81], [348, 58], [353, 36], [343, 24], [330, 25]], "text": "O", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "O", "recog_valid": true, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383576.jpg", "caption": "a living room with a couch, a table, and a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514662.jpg", "caption": "a red and white train on the tracks next to wind turbines", "annotations": [{"polygon": [[298, 288], [317, 284], [348, 274], [350, 271], [344, 258], [297, 269]], "text": "TR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZRi", "recog_valid": false, "glyph_recog_text": "T R", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121448.jpg", "caption": "a police car parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383605.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[248, 128], [292, 124], [306, 148], [259, 155]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "20", "recog_valid": true, "glyph_recog_text": "20", "glyph_recog_ld": 1.0}, {"polygon": [[266, 338], [237, 396], [243, 404], [272, 343]], "text": "EASTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NOLSV3", "recog_valid": false, "glyph_recog_text": "专6370粉", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252567.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383640.jpg", "caption": "a cat sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121535.jpg", "caption": "a wooden sign pointing to a bus stop", "annotations": [{"polygon": [[229, 397], [229, 397], [229, 397], [244, 419], [342, 340], [330, 325]], "text": "csgphotos", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "cegphotos", "recog_valid": false, "glyph_recog_text": "csgphotos", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[457, 249], [464, 263], [511, 237], [511, 209]], "text": "csgp", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "csgp", "recog_valid": true, "glyph_recog_text": "csg", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[342, 324], [342, 324], [355, 342], [457, 268], [448, 255]], "text": "csgphotos", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "csgphotos", "recog_valid": true, "glyph_recog_text": "csgphotos", "glyph_recog_ld": 1.0}, {"polygon": [[230, 209], [235, 227], [279, 202], [270, 192]], "text": "SHAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SI!RA", "recog_valid": false, "glyph_recog_text": "SHAR", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514771.jpg", "caption": "two airplanes parked on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514777.jpg", "caption": "a group of people playing frisbee", "annotations": [{"polygon": [[305, 282], [353, 288], [348, 324], [302, 319]], "text": "24", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "24", "recog_valid": true, "glyph_recog_text": "24", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514798.jpg", "caption": "a black steam engine train traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514800.jpg", "caption": "a purse, cell phone, keys, and other items laid out on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252664.jpg", "caption": "a train is pulling into a station with a gas pump", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121598.jpg", "caption": "a teddy bear sits in the back seat of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121619.jpg", "caption": "a man is standing next to a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252693.jpg", "caption": "a gate with a stop sign and a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514839.jpg", "caption": "a man on a motorcycle is sitting on the back of the bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252702.jpg", "caption": "a group of young men playing trombones in front of a stage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514890.jpg", "caption": "a red post on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252751.jpg", "caption": "a group of people sitting at a table eating food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121709.jpg", "caption": "a snowboarder is doing a trick on a ramp", "annotations": [{"polygon": [[163, 354], [173, 350], [200, 347], [213, 353], [211, 365], [195, 369], [168, 378]], "text": "FANTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Fanta", "recog_valid": false, "glyph_recog_text": "FANTA", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[184, 240], [227, 230], [228, 254], [187, 265]], "text": "Fan", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "n)", "recog_valid": false, "glyph_recog_text": "Fan", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514930.jpg", "caption": "a man and woman playing a video game at a convention", "annotations": [{"polygon": [[206, 197], [259, 196], [261, 219], [242, 229], [205, 203]], "text": "D", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "一A", "recog_valid": false, "glyph_recog_text": "D", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383866.jpg", "caption": "a silver pick up truck with a tire on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252799.jpg", "caption": "a woman in a bathtub", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514945.jpg", "caption": "a man wearing a hat and a blue shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383873.jpg", "caption": "a lunch box with pasta, carrots, and a biscuit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383893.jpg", "caption": "a living room with a christmas tree and a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121749.jpg", "caption": "a man in a blue shirt and tie standing in front of a flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121754.jpg", "caption": "a young man standing in a train station with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000514997.jpg", "caption": "a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121785.jpg", "caption": "the new york city memorial to the victims of 9/11", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383930.jpg", "caption": "a man in a suit riding a horse in an arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515019.jpg", "caption": "a refrigerator with a bottle of beer on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515020.jpg", "caption": "a group of people standing around a food truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000383956.jpg", "caption": "an old photo of a train station with a bridge over it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515040.jpg", "caption": "a white van with a bird on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515053.jpg", "caption": "new zealand's new prime minister, john key, speaks to the media in new zealand's capital city, Wellington, on june 7, 2011", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515075.jpg", "caption": "a soccer player is kicking the ball on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252937.jpg", "caption": "a man and a dog on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384021.jpg", "caption": "a brick building with a tv and a sign", "annotations": [{"polygon": [[117, 248], [124, 281], [119, 282], [116, 326], [122, 326], [125, 339], [143, 354], [146, 338], [136, 316], [131, 315], [131, 282], [136, 282], [132, 243]], "text": "ZAKURO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NxS", "recog_valid": false, "glyph_recog_text": "N4¥30", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121879.jpg", "caption": "a street with cars parked on both sides of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252952.jpg", "caption": "people sitting at tables under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252956.jpg", "caption": "a stop sign is shown next to a wooden deck", "annotations": [{"polygon": [[287, 269], [292, 312], [416, 282], [409, 238]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515100.jpg", "caption": "a bedroom with a desk, computer and bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384030.jpg", "caption": "a large building with many windows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384034.jpg", "caption": "a man playing tennis on a clay court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515136.jpg", "caption": "a fluffy orange cat sitting on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000252998.jpg", "caption": "three people sitting on a couch with laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253004.jpg", "caption": "a book on a laptop computer", "annotations": [{"polygon": [[164, 224], [187, 234], [191, 230], [190, 229], [211, 210], [217, 212], [218, 210], [206, 205], [198, 198]], "text": "LIBRARY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IIERRARY", "recog_valid": false, "glyph_recog_text": "LIBRARY", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[126, 220], [153, 232], [204, 194], [178, 182]], "text": "lMAGINATION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AKGNOIN", "recog_valid": false, "glyph_recog_text": "IMAGINATION", "glyph_recog_ld": 0.3636369421482344}, {"polygon": [[2, 209], [2, 209], [55, 171], [38, 166], [2, 185]], "text": "PER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "IPER", "recog_valid": false, "glyph_recog_text": "PER", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121943.jpg", "caption": "two women playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121952.jpg", "caption": "a woman sitting at a table with a gun and a bunch of cell phones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000121954.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253049.jpg", "caption": "a box of doughnuts", "annotations": [{"polygon": [[45, 45], [52, 33], [218, 52], [218, 73], [48, 56]], "text": "GREENBUSH", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "GREENBUSH", "recog_valid": true, "glyph_recog_text": "GREENBUSH", "glyph_recog_ld": 1.0}, {"polygon": [[42, 164], [204, 180], [195, 192], [195, 202], [39, 189]], "text": "BAKERY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BAKERY", "recog_valid": true, "glyph_recog_text": "BAKERY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253054.jpg", "caption": "a police officer on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515212.jpg", "caption": "a living room with a dining room table and chairs", "annotations": [{"polygon": [[55, 224], [54, 236], [63, 235], [70, 234], [80, 230], [89, 224], [93, 216], [99, 211], [103, 202], [104, 199], [99, 193], [94, 200], [90, 207], [84, 213], [81, 217], [77, 221], [71, 223], [68, 225], [61, 226]], "text": "BIRTHDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SRTRO", "recog_valid": false, "glyph_recog_text": "BIRTHDAY", "glyph_recog_ld": 0.2500009374988281}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384164.jpg", "caption": "a white truck with a trailer full of atv's", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384173.jpg", "caption": "a sandwich and orange juice on a tray", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384176.jpg", "caption": "three men sitting at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515249.jpg", "caption": "a man eating a chocolate donut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515252.jpg", "caption": "a woman walking a horse down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253109.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[182, 187], [322, 207], [322, 261], [181, 247]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[249, 270], [294, 271], [283, 313], [260, 304], [248, 283]], "text": "SA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5", "recog_valid": false, "glyph_recog_text": "SA", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253122.jpg", "caption": "a person riding a motorcycle on a desert plain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253141.jpg", "caption": "a surfer riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515304.jpg", "caption": "a p-51 d Mustang, c-47, and f-86f fighter aircraft,", "annotations": [{"polygon": [[455, 238], [509, 225], [512, 268], [468, 280]], "text": "F .", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "F", "recog_valid": false, "glyph_recog_text": "F.", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122093.jpg", "caption": "two men shaking hands with a baseball player", "annotations": [{"polygon": [[130, 304], [138, 327], [168, 311], [168, 295], [147, 291]], "text": "FIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tr", "recog_valid": false, "glyph_recog_text": "FIT", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515309.jpg", "caption": "a person riding a motorcycle on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122097.jpg", "caption": "a sandwich and a side of cole slaw on a tray", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515315.jpg", "caption": "a group of people standing around a jeep on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253183.jpg", "caption": "a pizza on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122116.jpg", "caption": "a box of scissors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122118.jpg", "caption": "a large clock tower with people standing around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384261.jpg", "caption": "a teddy bear wearing a t-shirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384260.jpg", "caption": "a glass of wine sitting on a table near a pool", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515354.jpg", "caption": "a vintage postcard showing two people on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253217.jpg", "caption": "a parking meter with two different prices on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253221.jpg", "caption": "a pink elephant sign with the words car wash on it", "annotations": [{"polygon": [[224, 145], [338, 163], [339, 218], [220, 204]], "text": "CAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CAR", "recog_valid": true, "glyph_recog_text": "CAR", "glyph_recog_ld": 1.0}, {"polygon": [[168, 200], [347, 225], [345, 279], [175, 262]], "text": "WASH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WASH", "recog_valid": true, "glyph_recog_text": "WASH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122157.jpg", "caption": "fast snowboarding com", "annotations": [{"polygon": [[244, 85], [246, 140], [486, 145], [486, 82]], "text": "FAST", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "FAST", "recog_valid": true, "glyph_recog_text": "FAST", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384302.jpg", "caption": "vintage pink refrigerator with a pink door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122164.jpg", "caption": "a young man holding up a sign in front of a street sign", "annotations": [{"polygon": [[146, 70], [146, 99], [266, 90], [266, 64]], "text": "Homer", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Homer", "recog_valid": true, "glyph_recog_text": "Homer", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253238.jpg", "caption": "a woman holding a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384326.jpg", "caption": "a beach with umbrellas and a child on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122189.jpg", "caption": "a little girl is flying a kite in the park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253261.jpg", "caption": "a street sign with a sign that says hong science museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253265.jpg", "caption": "a laptop on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122194.jpg", "caption": "a shopping cart is sitting on the grass next to a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122207.jpg", "caption": "a green and yellow train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515428.jpg", "caption": "carrots and other vegetables are on display at a farmers market", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253285.jpg", "caption": "an orange and white airplane taking off from the runway", "annotations": [{"polygon": [[137, 176], [137, 176], [165, 213], [178, 234], [168, 244], [152, 218], [143, 210], [143, 204], [130, 181], [132, 177]], "text": "easyJet", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "easyJet", "recog_valid": true, "glyph_recog_text": "easyJel", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[202, 253], [191, 265], [194, 267], [226, 270], [237, 269], [255, 270], [271, 266], [316, 259], [382, 249], [384, 224], [319, 230], [287, 234]], "text": "easyJet.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "easyetcom", "recog_valid": false, "glyph_recog_text": "easyJet.com", "glyph_recog_ld": 0.8181819834709241}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122235.jpg", "caption": "a laptop computer with a picture of a train on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253308.jpg", "caption": "an owl is sitting on a branch in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515457.jpg", "caption": "cars are driving down the street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384389.jpg", "caption": "a man wearing a helmet and a red shirt is riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384395.jpg", "caption": "a train traveling down the tracks in a rural area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253334.jpg", "caption": "a person doing a trick on skis", "annotations": [{"polygon": [[188, 196], [169, 199], [123, 126], [137, 124]], "text": "NORDICA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "F2N", "recog_valid": false, "glyph_recog_text": "NORDICA", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[256, 237], [273, 233], [334, 306], [320, 311]], "text": "NORDICA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NMORDKCA", "recog_valid": false, "glyph_recog_text": "NORDICA", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384412.jpg", "caption": "an old black and white photo of a sign in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515502.jpg", "caption": "a steam locomotive is on the tracks in the woods", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515527.jpg", "caption": "a pair of scissors and a magazine on a table", "annotations": [{"polygon": [[222, 124], [261, 97], [266, 106], [229, 133]], "text": "PARTY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PARTY", "recog_valid": true, "glyph_recog_text": "PARTY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384463.jpg", "caption": "a hot dog and fries on a paper plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384475.jpg", "caption": "a black and white photo of a train in a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515550.jpg", "caption": "a man speaking at a rally with a sign in front of him", "annotations": [{"polygon": [[280, 151], [278, 189], [363, 182], [369, 142]], "text": "hama", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "euama", "recog_valid": false, "glyph_recog_text": "hama", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[413, 153], [412, 173], [451, 165], [449, 143]], "text": "YOU", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "YOU", "recog_valid": true, "glyph_recog_text": "YOU", "glyph_recog_ld": 1.0}, {"polygon": [[454, 141], [456, 164], [496, 153], [496, 130]], "text": "CAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CAN", "recog_valid": true, "glyph_recog_text": "CAN", "glyph_recog_ld": 1.0}, {"polygon": [[407, 195], [405, 207], [409, 220], [515, 206], [512, 177]], "text": "PI", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "PlIT", "recog_valid": false, "glyph_recog_text": "P t", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[299, 205], [301, 231], [373, 223], [369, 199]], "text": "STONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TDXE", "recog_valid": false, "glyph_recog_text": "STONE", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122343.jpg", "caption": "a couple of trucks parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253416.jpg", "caption": "a street with cars parked in front of buildings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515561.jpg", "caption": "a pair of skis and a pair of snowboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384499.jpg", "caption": "a bus is parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515581.jpg", "caption": "a girl in a scout uniform smiles while holding a kite", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122403.jpg", "caption": "a school bus parked in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515623.jpg", "caption": "a motorcycle is on display in a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253479.jpg", "caption": "a man throwing a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253483.jpg", "caption": "a white wall", "annotations": [{"polygon": [[154, 300], [154, 300], [152, 324], [152, 324], [169, 319], [210, 304], [211, 281]], "text": "Speed Stick ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SpEEd", "recog_valid": false, "glyph_recog_text": "Speed Stick", "glyph_recog_ld": 0.2727279338836964}, {"polygon": [[157, 322], [207, 306], [202, 328], [155, 343]], "text": "stick", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "stick", "recog_valid": true, "glyph_recog_text": "stick", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384559.jpg", "caption": "a man wearing a red sweater vest and tie", "annotations": [{"polygon": [[340, 19], [340, 47], [376, 47], [383, 54], [396, 47], [435, 46], [434, 29], [400, 24], [379, 19]], "text": "Fontana", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "lonjana", "recog_valid": false, "glyph_recog_text": "Fontana", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[330, 415], [326, 452], [417, 421], [422, 389]], "text": "ORIGINAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "uaiaae", "recog_valid": false, "glyph_recog_text": "ORIGINAL", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[345, 456], [351, 471], [405, 456], [400, 435]], "text": "Fontana", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "lonjana", "recog_valid": false, "glyph_recog_text": "Fontana", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122419.jpg", "caption": "dell inspiron 15 7000 2-in-1", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253500.jpg", "caption": "a plate with several different types of desserts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515655.jpg", "caption": "a train is traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515662.jpg", "caption": "a tray of hot dogs and fries", "annotations": [{"polygon": [[358, 235], [351, 242], [356, 249], [368, 258], [390, 267], [395, 258], [384, 255], [371, 246], [363, 238]], "text": "Nathan's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PuDuo\"", "recog_valid": false, "glyph_recog_text": "Nathan's", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[-1, 346], [23, 371], [61, 363], [85, 342], [97, 318], [100, 294], [88, 256], [87, 267], [90, 290], [78, 320], [59, 339], [48, 345], [14, 344]], "text": "han", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "hee", "recog_valid": false, "glyph_recog_text": "han", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384595.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[302, 198], [317, 186], [327, 182], [341, 175], [350, 172], [356, 171], [366, 171], [369, 187], [356, 188], [345, 192], [332, 199], [322, 205], [315, 211]], "text": "HEYWARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HEYWARO", "recog_valid": false, "glyph_recog_text": "HEYWARD", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[314, 222], [337, 213], [358, 204], [376, 197], [382, 219], [388, 248], [363, 255], [330, 270]], "text": "22", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "22", "recog_valid": true, "glyph_recog_text": "22", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384598.jpg", "caption": "a red post box with a bridge in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384603.jpg", "caption": "a plane is parked on the tarmac with luggage on the ground", "annotations": [{"polygon": [[52, 337], [49, 360], [133, 373], [140, 355], [69, 345], [64, 338]], "text": "BLACKLEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "BLACKLEY", "recog_valid": true, "glyph_recog_text": "BLACKLEY", "glyph_recog_ld": 1.0}, {"polygon": [[369, 394], [367, 426], [466, 426], [467, 408]], "text": "CLARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "CLARK", "recog_valid": true, "glyph_recog_text": "CLARK", "glyph_recog_ld": 1.0}, {"polygon": [[216, 375], [217, 399], [350, 419], [350, 393]], "text": "SILVESTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SILVESTER", "recog_valid": true, "glyph_recog_text": "SILVESTER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253532.jpg", "caption": "a sign on a pole that says no attention required", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515684.jpg", "caption": "three yellow airplanes parked in an indoor hangar", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384626.jpg", "caption": "a man riding a motorcycle on a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515701.jpg", "caption": "man sitting on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515702.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[238, 263], [234, 286], [291, 296], [292, 276]], "text": "Phillies", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Philni2", "recog_valid": false, "glyph_recog_text": "Phillles", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122498.jpg", "caption": "a kitchen with a stove, microwave and coffee maker", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122511.jpg", "caption": "a group of people on a boat with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515728.jpg", "caption": "a large clock tower with a clock on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122537.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384691.jpg", "caption": "three horses are running together in a dirt field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122583.jpg", "caption": "a double decker bus with a yellow and blue design", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122584.jpg", "caption": "a table full of vegetables and fruits", "annotations": [{"polygon": [[104, 16], [117, 10], [140, 37], [126, 45]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "", "recog_valid": false, "glyph_recog_text": "A", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122586.jpg", "caption": "a man with skis on his back is standing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384738.jpg", "caption": "a bunch of bananas for sale at a market", "annotations": [{"polygon": [[226, 109], [226, 146], [270, 148], [285, 113], [276, 104], [235, 105]], "text": "89", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "89", "recog_valid": true, "glyph_recog_text": "89", "glyph_recog_ld": 1.0}, {"polygon": [[247, 258], [252, 299], [265, 302], [266, 306], [216, 307], [219, 303], [218, 269], [237, 259]], "text": "1", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "王", "recog_valid": false, "glyph_recog_text": "1", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253675.jpg", "caption": "a row of remote controls", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515853.jpg", "caption": "a double decker bus with a flag on the roof", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122667.jpg", "caption": "a group of young men posing for a photo with tennis rackets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384826.jpg", "caption": "a white bag with a banana, a bottle of water, and a sandwich", "annotations": [{"polygon": [[247, 279], [222, 291], [205, 305], [197, 307], [197, 289], [215, 278], [228, 267], [235, 263], [243, 257], [246, 257]], "text": "Mars", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "oW", "recog_valid": false, "glyph_recog_text": "Mars", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[197, 340], [202, 355], [206, 366], [216, 392], [221, 390], [226, 384], [222, 370], [217, 355], [211, 340], [208, 331], [212, 327], [210, 319], [205, 311], [197, 310], [190, 315], [180, 320], [173, 328], [178, 345], [187, 346], [194, 341]], "text": "Snaps", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S超", "recog_valid": false, "glyph_recog_text": "Snaps", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[202, 388], [212, 381], [207, 363], [199, 347], [197, 340], [197, 336], [200, 331], [204, 334], [212, 327], [206, 311], [198, 310], [179, 320], [174, 328], [178, 345], [188, 346], [187, 347], [194, 367], [185, 373]], "text": "Sesame Snaps ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SH", "recog_valid": false, "glyph_recog_text": "Snap", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515916.jpg", "caption": "a group of people posing for a picture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515917.jpg", "caption": "a group of people in a kitchen with a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122704.jpg", "caption": "a stop sign on a street corner", "annotations": [{"polygon": [[164, 54], [166, 92], [258, 89], [255, 50]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122709.jpg", "caption": "a baseball player swinging at a pitch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253784.jpg", "caption": "a group of umbrellas and bags are lined up against a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000515928.jpg", "caption": "a boy swinging a bat at a baseball game", "annotations": [{"polygon": [[128, 237], [176, 235], [174, 200], [136, 201], [126, 222]], "text": "48", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "48", "recog_valid": true, "glyph_recog_text": "48", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384874.jpg", "caption": "a woman in a green shirt and purple shorts is playing tennis", "annotations": [{"polygon": [[86, 150], [162, 150], [163, 235], [85, 241]], "text": "EIO", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "EO", "recog_valid": false, "glyph_recog_text": "E", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253828.jpg", "caption": "a traffic light with a bicycle painted on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253834.jpg", "caption": "a bowl of cereal and a glass of water on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384909.jpg", "caption": "a large jetliner sitting on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516012.jpg", "caption": "a white and pink bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384953.jpg", "caption": "a tray of food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122811.jpg", "caption": "a train traveling down the tracks with a few cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000384957.jpg", "caption": "a refrigerator with a sign on it in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516046.jpg", "caption": "a group of people in a kitchen preparing food", "annotations": [{"polygon": [[421, 378], [421, 408], [494, 409], [494, 417], [498, 417], [496, 379]], "text": "cookun", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Cooku", "recog_valid": false, "glyph_recog_text": "cookun", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516049.jpg", "caption": "a man and a woman are standing on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253937.jpg", "caption": "a skateboarder doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122867.jpg", "caption": "a group of people on motorcycles in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253970.jpg", "caption": "a desk with a laptop, a keyboard, a mouse, and an open book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000253971.jpg", "caption": "a yellow surf rescue sign on a beach", "annotations": [{"polygon": [[248, 332], [244, 359], [181, 346], [186, 321]], "text": "SURF", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SURF", "recog_valid": true, "glyph_recog_text": "SURF", "glyph_recog_ld": 1.0}, {"polygon": [[258, 334], [255, 361], [356, 378], [359, 352]], "text": "RESCUE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RESCUE", "recog_valid": true, "glyph_recog_text": "RESCUE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516124.jpg", "caption": "a clock on the side of a building with the words restaurant", "annotations": [{"polygon": [[5, 483], [29, 482], [44, 496], [80, 488], [374, 482], [397, 436], [40, 441], [24, 445], [11, 453], [14, 463], [19, 460]], "text": "Resturant", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Reclanrant", "recog_valid": false, "glyph_recog_text": "Resturant", "glyph_recog_ld": 0.6000003999996}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122908.jpg", "caption": "a jet plane is flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122916.jpg", "caption": "a young boy holding a tennis racket on a tennis court", "annotations": [{"polygon": [[78, 278], [92, 312], [60, 325], [44, 292]], "text": "m", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "3", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[458, 118], [512, 125], [512, 206], [457, 202]], "text": "NE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "兰", "recog_valid": false, "glyph_recog_text": "Zu", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122923.jpg", "caption": "a tug boat sits in the water near a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385070.jpg", "caption": "two men sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254007.jpg", "caption": "a box with a tv in it sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122937.jpg", "caption": "a newborn boy is sleeping in a baseball glove and baseball bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516167.jpg", "caption": "a red truck parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516189.jpg", "caption": "a green vehicle with luggage and a backpack sitting on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000122973.jpg", "caption": "a red trolley bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516193.jpg", "caption": "a group of people standing around a table with pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385145.jpg", "caption": "a row of bicycles parked on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385146.jpg", "caption": "a young boy is playing with a frisbee in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254081.jpg", "caption": "a little girl in a gold dress standing in front of a subway station", "annotations": [{"polygon": [[142, 43], [211, 38], [211, 38], [216, 72], [216, 72], [144, 78]], "text": "Exit", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Exit", "recog_valid": true, "glyph_recog_text": "Exit", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516233.jpg", "caption": "a book cover with a suitcase and a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123028.jpg", "caption": "a small toy bear holding a coffee cup and a box of cereal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385185.jpg", "caption": "a store display with various tools and tools", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254124.jpg", "caption": "a blue sign with an arrow pointing up in front of some leaves", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254139.jpg", "caption": "a cat wearing a red tie and sitting on the floor", "annotations": [{"polygon": [[67, 79], [64, 83], [64, 116], [132, 117], [135, 109], [137, 89], [134, 78], [120, 75], [90, 76], [73, 74]], "text": "2ND", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "2ND", "recog_valid": true, "glyph_recog_text": "2ND", "glyph_recog_ld": 1.0}, {"polygon": [[140, 76], [139, 117], [239, 116], [239, 116], [238, 75]], "text": "PRISE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PRISE", "recog_valid": true, "glyph_recog_text": "PRISE", "glyph_recog_ld": 1.0}, {"polygon": [[242, 76], [240, 80], [241, 117], [275, 117], [275, 76]], "text": "IZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "IZ", "recog_valid": true, "glyph_recog_text": "1", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[278, 78], [278, 111], [281, 115], [289, 118], [381, 117], [380, 75], [286, 75]], "text": "STAKE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STAKE", "recog_valid": true, "glyph_recog_text": "STAKE", "glyph_recog_ld": 1.0}, {"polygon": [[384, 78], [385, 117], [458, 117], [466, 112], [466, 83], [459, 75], [385, 76]], "text": "NIVS", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "NIVS", "recog_valid": true, "glyph_recog_text": "NIVS", "glyph_recog_ld": 1.0}, {"polygon": [[348, 397], [348, 436], [425, 437], [424, 396]], "text": "FIRD", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "FIRD", "recog_valid": true, "glyph_recog_text": "FIRD", "glyph_recog_ld": 1.0}, {"polygon": [[295, 396], [297, 435], [345, 436], [341, 397]], "text": "UR", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "UR", "recog_valid": true, "glyph_recog_text": "UR", "glyph_recog_ld": 1.0}, {"polygon": [[260, 397], [260, 436], [291, 436], [293, 396]], "text": "IZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "E", "recog_valid": false, "glyph_recog_text": "-", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[157, 396], [156, 436], [256, 435], [255, 397]], "text": "PRISE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "PRISE", "recog_valid": true, "glyph_recog_text": "PRISE", "glyph_recog_ld": 1.0}, {"polygon": [[82, 397], [80, 437], [152, 437], [155, 399]], "text": "3RD", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "3RD", "recog_valid": true, "glyph_recog_text": "3RD", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254150.jpg", "caption": "a yellow sign has texts", "annotations": [{"polygon": [[136, 140], [136, 186], [226, 193], [226, 156], [231, 156], [231, 148], [136, 140]], "text": "DO NOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DO NOT", "recog_valid": true, "glyph_recog_text": "DO NOT", "glyph_recog_ld": 1.0}, {"polygon": [[142, 201], [225, 203], [222, 217], [227, 244], [228, 246], [144, 247]], "text": "BLOCK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BLOCK", "recog_valid": true, "glyph_recog_text": "BLOCK", "glyph_recog_ld": 1.0}, {"polygon": [[118, 261], [247, 259], [245, 279], [244, 300], [121, 309]], "text": "DRIVEWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DRIVEWAY", "recog_valid": true, "glyph_recog_text": "DRIVEWAY", "glyph_recog_ld": 1.0}, {"polygon": [[319, 233], [364, 231], [398, 232], [404, 242], [405, 251], [403, 263], [398, 269], [391, 269], [392, 298], [320, 304], [313, 298], [311, 287], [309, 285], [310, 249], [313, 235]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254176.jpg", "caption": "a group of people dressed as bears riding on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385265.jpg", "caption": "a street sign that says lucky cave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254198.jpg", "caption": "a vintage truck with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123127.jpg", "caption": "a train is parked at a station with a sign that says ports de jant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254204.jpg", "caption": "sheep grazing on a hillside near the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385283.jpg", "caption": "a man brushing his daughter's teeth in the bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254212.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385302.jpg", "caption": "a woman in a green shirt is feeding a zebra", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385305.jpg", "caption": "a yellow bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254242.jpg", "caption": "a f-16 fighter jet sits on the grass in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254244.jpg", "caption": "a young man in a white uniform kicking a soccer ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123172.jpg", "caption": "a young boy kicking a soccer ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123184.jpg", "caption": "a man standing next to a motorcycle on a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123190.jpg", "caption": "a snowboarder is in the air over a ramp", "annotations": [{"polygon": [[46, 407], [51, 449], [115, 407], [113, 387]], "text": "XGRAND", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "XGR410D", "recog_valid": false, "glyph_recog_text": "XGRAND", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385337.jpg", "caption": "two baseball players are talking on the field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516417.jpg", "caption": "a steam train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254281.jpg", "caption": "a pink phone next to a stuffed animal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516444.jpg", "caption": "a street with people walking and a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385389.jpg", "caption": "a man and a woman are working on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254327.jpg", "caption": "a young boy holding a baseball bat on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123262.jpg", "caption": "an old postcard of a train station with smoke coming out of the engine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516490.jpg", "caption": "a street sign is shown in front of a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385425.jpg", "caption": "a stop sign on a street corner at night", "annotations": [{"polygon": [[282, 216], [337, 216], [341, 247], [284, 244]], "text": "Yoga", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Yoga", "recog_valid": true, "glyph_recog_text": "Yoga", "glyph_recog_ld": 1.0}, {"polygon": [[249, 46], [242, 79], [306, 104], [313, 72]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[231, 33], [237, 13], [304, -4], [302, 16]], "text": "Maryland", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Marvlang", "recog_valid": false, "glyph_recog_text": "Maryland", "glyph_recog_ld": 0.7500003124996093}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123286.jpg", "caption": "two men in white and green playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254362.jpg", "caption": "four pictures of ducks swimming in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385441.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516516.jpg", "caption": "a snowboarder in mid air doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385448.jpg", "caption": "a man standing next to a cow on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123312.jpg", "caption": "a man and woman cutting a cake with a beard", "annotations": [{"polygon": [[202, 89], [193, 122], [232, 131], [240, 102]], "text": "30", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "30", "recog_valid": true, "glyph_recog_text": "30", "glyph_recog_ld": 1.0}, {"polygon": [[220, 76], [243, 68], [255, 73], [264, 92], [263, 97], [248, 102], [241, 94], [233, 89], [225, 92]], "text": "BIG", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "BIG", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516535.jpg", "caption": "a beach umbrella with a sign that says bisli", "annotations": [{"polygon": [[128, 159], [102, 183], [277, 172], [282, 151]], "text": "Bisleri", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bisleri", "recog_valid": true, "glyph_recog_text": "Bisleri", "glyph_recog_ld": 1.0}, {"polygon": [[299, 149], [322, 170], [354, 176], [392, 184], [435, 195], [408, 177]], "text": "Bisleri", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bsrex", "recog_valid": false, "glyph_recog_text": "B15fe1i", "glyph_recog_ld": 0.28571530612099116}, {"polygon": [[324, 240], [340, 260], [363, 247], [379, 245], [410, 243], [390, 217], [346, 226]], "text": "Bisleri", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "inslei8", "recog_valid": false, "glyph_recog_text": "Bisleri", "glyph_recog_ld": 0.428572244896793}, {"polygon": [[144, 231], [137, 251], [178, 253], [217, 261], [224, 241], [185, 232]], "text": "Bisleri", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "noleia", "recog_valid": false, "glyph_recog_text": "Bisleri", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385464.jpg", "caption": "two people playing frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123330.jpg", "caption": "two pizzas on a stove top with a pan of sauce", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254410.jpg", "caption": "a man and a woman are looking at a laptop", "annotations": [{"polygon": [[288, 148], [296, 145], [348, 138], [352, 156], [309, 160], [296, 171], [286, 167]], "text": "good", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "good", "recog_valid": true, "glyph_recog_text": "good", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254411.jpg", "caption": "a yellow and blue fire hydrant in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385492.jpg", "caption": "a cubicle with two monitors and a keyboard on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385522.jpg", "caption": "a crowd of people standing around a horse mask", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516619.jpg", "caption": "an elephant painting on a canvas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385577.jpg", "caption": "a man pulling a cart with a white cow", "annotations": [{"polygon": [[403, 230], [415, 220], [448, 251], [430, 259]], "text": "67637", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "7287", "recog_valid": false, "glyph_recog_text": "67637", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385582.jpg", "caption": "new zealand's new prime minister john key", "annotations": [{"polygon": [[127, 427], [138, 419], [163, 452], [154, 464]], "text": "ZB", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ZB", "recog_valid": true, "glyph_recog_text": "28", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[138, 410], [170, 450], [173, 445], [172, 440], [144, 406]], "text": "ZB", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Newstalk", "recog_valid": false, "glyph_recog_text": ":品", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[158, 382], [149, 392], [175, 418], [181, 417], [187, 410]], "text": "RADIO", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "RADIO", "recog_valid": true, "glyph_recog_text": "RADIO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123444.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385588.jpg", "caption": "a man standing next to a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516673.jpg", "caption": "two dogs standing on a tiled floor near a plane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516685.jpg", "caption": "a man swinging a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385627.jpg", "caption": "a bowl of chopped up vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254567.jpg", "caption": "a sheep and a lamb standing in a field", "annotations": [{"polygon": [[152, 190], [210, 188], [222, 225], [159, 235]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "12", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385653.jpg", "caption": "a surfer riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385661.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123523.jpg", "caption": "a cow is sitting on top of a pile of snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385667.jpg", "caption": "a clock and a clock face on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385675.jpg", "caption": "a blue truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254604.jpg", "caption": "a backpack, a laptop, a mouse, a book, a cd, a mouse pad, a cd player, a cd, a mouse, a cd player", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123539.jpg", "caption": "a man in a chef's hat standing at a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254615.jpg", "caption": "a woman throwing a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385701.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516777.jpg", "caption": "two street signs in arabic", "annotations": [{"polygon": [[284, 231], [284, 231], [327, 200], [329, 200], [334, 218], [285, 248]], "text": "Road", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Road", "recog_valid": true, "glyph_recog_text": "Road", "glyph_recog_ld": 1.0}, {"polygon": [[216, 279], [263, 245], [274, 243], [276, 255], [216, 294]], "text": "Mussalla", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Mussalla", "recog_valid": true, "glyph_recog_text": "Mumealia", "glyph_recog_ld": 0.6250004687494141}, {"polygon": [[123, 339], [181, 356], [185, 366], [183, 375], [124, 357], [119, 350]], "text": "Street", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Street", "recog_valid": true, "glyph_recog_text": "Street", "glyph_recog_ld": 1.0}, {"polygon": [[52, 317], [111, 336], [109, 354], [46, 332]], "text": "Deria", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Deira", "recog_valid": false, "glyph_recog_text": "Deria", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[267, 227], [269, 239], [272, 239], [283, 231], [290, 227], [309, 206], [328, 191], [327, 184], [318, 183], [297, 196]], "text": "Road", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "E", "recog_valid": false, "glyph_recog_text": "Road", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254632.jpg", "caption": "a boy riding a motorcycle in a carnival", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516782.jpg", "caption": "a tennis player on a court with a net", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385724.jpg", "caption": "a group of people playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516808.jpg", "caption": "a person cutting a piece of cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516812.jpg", "caption": "a red fire hydrant in a field near a tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385744.jpg", "caption": "two women sitting at a table eating breakfast", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385753.jpg", "caption": "a woman in a skirt and cowboy boots holding a cat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123612.jpg", "caption": "an old photo of a train and building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254714.jpg", "caption": "a polaroid of a fire hydrant and a gas station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123646.jpg", "caption": "a white keyboard and mouse on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385804.jpg", "caption": "a jockey is racing a horse on a track", "annotations": [{"polygon": [[162, 244], [182, 205], [202, 216], [183, 254]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "寸", "recog_valid": false, "glyph_recog_text": "女", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123692.jpg", "caption": "a baseball player swinging at a pitch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000385837.jpg", "caption": "a baseball player kneeling down on a baseball field", "annotations": [{"polygon": [[232, 283], [227, 310], [259, 313], [266, 288]], "text": "15", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "15", "recog_valid": true, "glyph_recog_text": "15", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254774.jpg", "caption": "a group of people doing a trick in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254778.jpg", "caption": "a train is pulling into a station", "annotations": [{"polygon": [[73, 36], [247, 15], [247, 45], [74, 63], [74, 63]], "text": "NEWARK", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "NEWARK", "recog_valid": true, "glyph_recog_text": "NEWARK", "glyph_recog_ld": 1.0}, {"polygon": [[267, 10], [363, 1], [503, 0], [505, 17], [266, 40]], "text": "INTERNA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "INTE为", "recog_valid": false, "glyph_recog_text": "INTERNA", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516924.jpg", "caption": "a skateboarder doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123711.jpg", "caption": "a snowboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254787.jpg", "caption": "a cluttered desk with a computer, a fan, and a fan", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123749.jpg", "caption": "a parking lot with a stop sign and a few cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516981.jpg", "caption": "two vultures on a fence post in the desert", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254845.jpg", "caption": "a small plane on the tarmac with a person walking by", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000516996.jpg", "caption": "two skiers jumping over a snow obstacle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123788.jpg", "caption": "a british airways plane flying in the sky", "annotations": [{"polygon": [[286, 258], [335, 231], [338, 238], [292, 262]], "text": "TISH AIRWAYS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TISHAIRWAYS", "recog_valid": false, "glyph_recog_text": "W", "glyph_recog_ld": 0.0909099173546205}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254882.jpg", "caption": "a sink with a window sill with a bunch of oranges and lemons", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517026.jpg", "caption": "a man is playing tennis on a clay court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123831.jpg", "caption": "a stack of three suitcases with a tag on one of them", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254911.jpg", "caption": "a stop sign with a 4-way sign on top of it", "annotations": [{"polygon": [[151, 277], [146, 223], [297, 180], [308, 229]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[229, 332], [275, 316], [280, 337], [234, 351]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517086.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386016.jpg", "caption": "a train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386043.jpg", "caption": "a young man in a blue shirt is playing tennis", "annotations": [{"polygon": [[279, 237], [286, 255], [290, 251], [296, 246], [303, 240], [313, 233], [320, 230], [332, 228], [341, 229], [337, 213], [334, 210], [328, 210], [320, 211], [310, 213], [302, 217], [292, 224], [285, 229]], "text": "PANTHER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PANTHER", "recog_valid": true, "glyph_recog_text": "PANTHER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123920.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000254997.jpg", "caption": "a red train engine on a track with grass and trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000123935.jpg", "caption": "a bus with a bicycle parked next to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517176.jpg", "caption": "a red car is parked in front of a clock tower", "annotations": [{"polygon": [[167, 288], [167, 316], [264, 327], [265, 302]], "text": "ZARA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZARA", "recog_valid": true, "glyph_recog_text": "ZARA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517182.jpg", "caption": "a pair of scissors and a pair of pliers", "annotations": [{"polygon": [[91, 303], [76, 327], [6, 317], [0, 309], [1, 291], [39, 290]], "text": "Energizer", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "zibjau3", "recog_valid": false, "glyph_recog_text": "Energize", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386112.jpg", "caption": "three people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517226.jpg", "caption": "two red trains are parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255089.jpg", "caption": "a small plate with slices of oranges and lemons", "annotations": [{"polygon": [[269, 450], [268, 492], [305, 491], [304, 449]], "text": "by", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "by", "recog_valid": true, "glyph_recog_text": "b", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517246.jpg", "caption": "a man standing in front of a table with a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517251.jpg", "caption": "a man holding a tennis racket and a ball on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517261.jpg", "caption": "a horse and rider jumping over a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255123.jpg", "caption": "a man is helping a horse on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517305.jpg", "caption": "a red double decker bus with a wedding cake on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255164.jpg", "caption": "a woman riding a bike on a street next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124101.jpg", "caption": "a young boy holding a cup of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255179.jpg", "caption": "a cat laying on the floor next to books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517322.jpg", "caption": "a clock on a pole with a sign that says west history", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255182.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255203.jpg", "caption": "a plate with many cupcakes on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255214.jpg", "caption": "three blue buses parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124142.jpg", "caption": "a refrigerator and microwave sitting next to a wooden cabinet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517362.jpg", "caption": "a young boy flying a kite in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255239.jpg", "caption": "a man jumping over a wooden fence in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517395.jpg", "caption": "a purple train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386333.jpg", "caption": "two people on skis standing in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386341.jpg", "caption": "a yellow double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255271.jpg", "caption": "a traffic light on a street corner with a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386350.jpg", "caption": "a large white tank", "annotations": [{"polygon": [[330, 272], [317, 288], [307, 306], [320, 314], [353, 314], [355, 307], [341, 284]], "text": "Place", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Pece", "recog_valid": false, "glyph_recog_text": "Phuse", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[-1, 118], [104, 133], [90, 162], [-3, 146]], "text": "eriGas", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "eriGas", "recog_valid": true, "glyph_recog_text": "eriGas", "glyph_recog_ld": 1.0}, {"polygon": [[326, 279], [317, 313], [352, 317], [356, 285]], "text": "Place", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Goe", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[286, 248], [273, 279], [277, 286], [285, 291], [309, 289], [315, 286], [327, 276], [327, 262]], "text": "Joey's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "foy", "recog_valid": false, "glyph_recog_text": "Joey", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517438.jpg", "caption": "a half eaten donut sitting on a table", "annotations": [{"polygon": [[169, 86], [174, 79], [194, 83], [195, 88], [197, 104], [199, 120], [178, 120], [174, 117]], "text": "The", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "hc", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[161, 123], [163, 119], [178, 120], [188, 128], [194, 128], [192, 151], [188, 152], [161, 156]], "text": "Do", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Do", "recog_valid": true, "glyph_recog_text": "D", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517455.jpg", "caption": "a red train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386429.jpg", "caption": "a baseball player throwing a ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517510.jpg", "caption": "a blue and yellow train is on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255368.jpg", "caption": "a refrigerator with a lot of different types of drinks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517517.jpg", "caption": "a bus with a banana on the side", "annotations": [{"polygon": [[230, 323], [234, 349], [248, 345], [258, 344], [279, 339], [321, 330], [334, 320], [326, 308], [313, 308], [299, 309], [287, 316], [268, 319], [249, 325], [243, 319], [233, 317]], "text": "banana", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Parans", "recog_valid": false, "glyph_recog_text": "banana", "glyph_recog_ld": 0.5000008333319443}, {"polygon": [[322, 299], [329, 293], [335, 300], [350, 292], [362, 290], [375, 291], [384, 297], [381, 304], [371, 315], [360, 312], [352, 316], [347, 316], [334, 320]], "text": "bus", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VerS", "recog_valid": false, "glyph_recog_text": "bus", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517522.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255402.jpg", "caption": "a man on a skateboard doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517579.jpg", "caption": "three police officers on horses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124364.jpg", "caption": "two boys playing video games", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517582.jpg", "caption": "a laptop computer with a mouse and keyboard on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517584.jpg", "caption": "a man on a skateboard", "annotations": [{"polygon": [[190, 374], [190, 374], [177, 421], [250, 424], [250, 374]], "text": "VANS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "KT", "recog_valid": false, "glyph_recog_text": "VANS", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[442, 374], [442, 374], [429, 421], [506, 424], [507, 374]], "text": "VANS", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "LANS", "recog_valid": false, "glyph_recog_text": "VANS", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517625.jpg", "caption": "a table with food and other items on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386601.jpg", "caption": "two men standing next to each other with a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124466.jpg", "caption": "a stop sign that is leaning on a tree", "annotations": [{"polygon": [[185, 141], [245, 125], [249, 153], [191, 166]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124525.jpg", "caption": "a man on a snowboard going down a ramp", "annotations": [{"polygon": [[171, 190], [175, 172], [214, 175], [241, 191], [261, 218], [271, 247], [269, 273], [246, 269], [244, 258], [244, 241], [238, 229], [231, 218], [218, 204], [200, 200], [181, 197]], "text": "ISCO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "15C0", "recog_valid": false, "glyph_recog_text": "ISCO", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[225, 307], [243, 325], [233, 338], [211, 351], [177, 360], [175, 331], [194, 327], [209, 322], [216, 319]], "text": "TS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "176", "recog_valid": false, "glyph_recog_text": "T S", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[459, 417], [478, 403], [492, 395], [509, 381], [511, 409], [495, 424], [484, 427], [468, 428]], "text": "Sto", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Gto", "recog_valid": false, "glyph_recog_text": "Sto", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255604.jpg", "caption": "a man riding a bike in front of a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386684.jpg", "caption": "a young boy in a snow suit on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517755.jpg", "caption": "a man sitting at a table with a pizza and a salad", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517758.jpg", "caption": "a double decker bus on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517760.jpg", "caption": "a woman holding a doughnut and smiling", "annotations": [{"polygon": [[295, 402], [293, 408], [372, 436], [374, 431]], "text": "DONGHNUTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ROUANNUTS", "recog_valid": false, "glyph_recog_text": "erxdrhyta", "glyph_recog_ld": 1.1111098765503868e-06}, {"polygon": [[344, 398], [335, 413], [379, 428], [387, 411]], "text": "Kreme", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "hheme", "recog_valid": false, "glyph_recog_text": "Kreme", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255622.jpg", "caption": "a suitcase sitting on a stone floor next to a door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517772.jpg", "caption": "an old black and white photo of a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386724.jpg", "caption": "a man riding a skateboard in a large room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124593.jpg", "caption": "a person on skis standing on a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517830.jpg", "caption": "a stop sign on a street corner", "annotations": [{"polygon": [[262, 135], [262, 135], [266, 216], [133, 268], [135, 195], [255, 135]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386778.jpg", "caption": "a dog laying on a bed with a book", "annotations": [{"polygon": [[71, 263], [61, 282], [154, 307], [162, 288]], "text": "CHABON", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CHABON", "recog_valid": true, "glyph_recog_text": "CHABON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255713.jpg", "caption": "a person holding a black cell phone with a screen showing apps", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255714.jpg", "caption": "a yellow and blue double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124652.jpg", "caption": "a series of pictures of a skateboarder doing tricks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255728.jpg", "caption": "a small plane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517875.jpg", "caption": "a man sitting on a bench in a park with a mailbox", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517881.jpg", "caption": "a police car is parked next to a large airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517910.jpg", "caption": "an old yellow trolley car is parked in a garage", "annotations": [{"polygon": [[178, 345], [178, 358], [234, 382], [234, 367]], "text": "ROAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ROAD", "recog_valid": true, "glyph_recog_text": "ROAD", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386847.jpg", "caption": "a white and purple bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124715.jpg", "caption": "the dark knight joker figure is shown", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255791.jpg", "caption": "a zebra grazing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386872.jpg", "caption": "a person sitting at a table with a plate of pancakes and orange juice", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255800.jpg", "caption": "a man in a red shirt and black shorts is standing on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517958.jpg", "caption": "a red train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000517975.jpg", "caption": "a toy fire truck with a ladder and a ladder truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386919.jpg", "caption": "a large building with a clock on the top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124812.jpg", "caption": "a display of cakes and pastries on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000386984.jpg", "caption": "a bus on a city street with a traffic cone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124841.jpg", "caption": "a man walking on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000518071.jpg", "caption": "a cell phone sitting on a towel next to a small red box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255941.jpg", "caption": "a man on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124889.jpg", "caption": "a toilet in a bathroom with a paper towel dispenser", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124899.jpg", "caption": "a cat sitting on top of a toilet in a bathroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000387048.jpg", "caption": "a man running on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000255983.jpg", "caption": "a man and a woman in a wedding dress sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124914.jpg", "caption": "a halloween bento box with fruit, vegetables and a ghost", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124924.jpg", "caption": "a young boy wearing a hat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124935.jpg", "caption": "a baseball game in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000387102.jpg", "caption": "a red table cloth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000256057.jpg", "caption": "a woman is running on a tennis court", "annotations": [{"polygon": [[304, 169], [313, 219], [313, 219], [391, 218], [471, 212], [462, 179], [423, 182], [420, 163], [382, 165], [386, 183], [335, 185], [331, 167]], "text": "FedEx", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FedEx", "recog_valid": true, "glyph_recog_text": "FedEx", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000124992.jpg", "caption": "a railroad crossing sign with a stop sign and a train", "annotations": [{"polygon": [[334, 188], [392, 120], [398, 131], [343, 198]], "text": "CROSSING`", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CROSSING", "recog_valid": false, "glyph_recog_text": "CROSSING'", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[328, 126], [337, 116], [359, 151], [351, 157]], "text": "RAIL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RAIL", "recog_valid": true, "glyph_recog_text": "RAIL", "glyph_recog_ld": 1.0}, {"polygon": [[346, 232], [346, 259], [389, 256], [388, 229]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000256075.jpg", "caption": "a bus is stopped at a red light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125009.jpg", "caption": "a street sign with two signs on it", "annotations": [{"polygon": [[256, 139], [351, 135], [353, 190], [257, 194]], "text": "Tack", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tack", "recog_valid": true, "glyph_recog_text": "Tack", "glyph_recog_ld": 1.0}, {"polygon": [[385, 224], [387, 280], [202, 280], [203, 223]], "text": "Saddlebred", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Saddlebred", "recog_valid": true, "glyph_recog_text": "Saddlebred", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000518235.jpg", "caption": "a cat sitting in a window", "annotations": [{"polygon": [[188, 246], [190, 327], [122, 320], [87, 301], [89, 281], [97, 248], [123, 232], [161, 216]], "text": "Asey", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Lof", "recog_valid": false, "glyph_recog_text": "s", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000256096.jpg", "caption": "a female tennis player in action on the court", "annotations": [{"polygon": [[125, 130], [146, 95], [172, 115], [151, 149]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000256098.jpg", "caption": "an old photo of a street with a horse drawn carriage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000256105.jpg", "caption": "a group of people standing around a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125042.jpg", "caption": "a book on a bed with a blue and white striped blanket", "annotations": [{"polygon": [[265, 188], [278, 185], [325, 216], [312, 221]], "text": "Money", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Money", "recog_valid": true, "glyph_recog_text": "Monos", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[246, 193], [260, 189], [311, 226], [302, 232]], "text": "Finance", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Finance", "recog_valid": true, "glyph_recog_text": "Fin未指c台", "glyph_recog_ld": 0.5714291836725947}, {"polygon": [[286, 184], [323, 207], [342, 217], [364, 229], [387, 239], [397, 233], [372, 224], [356, 214], [316, 195], [304, 186], [297, 180]], "text": "International", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "International", "recog_valid": true, "glyph_recog_text": "feteroavicnai", "glyph_recog_ld": 0.5384618934908512}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000518273.jpg", "caption": "a young girl reading a book with a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000518288.jpg", "caption": "a young boy in a snow suit on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000256155.jpg", "caption": "a man and his dog sitting outside a tattoo shop", "annotations": [{"polygon": [[129, 114], [146, 106], [211, 92], [212, 118], [162, 131], [130, 143]], "text": "TATTOO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "1A TOO", "recog_valid": false, "glyph_recog_text": "TATTOO", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[236, 92], [287, 79], [338, 70], [366, 53], [380, 48], [395, 47], [402, 52], [405, 84], [336, 94], [293, 101], [256, 109], [236, 114]], "text": "PIERCIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PIERCING", "recog_valid": false, "glyph_recog_text": "PIERCIN", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000387256.jpg", "caption": "two women holding teddy bears", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125117.jpg", "caption": "a red and blue fire hydrant on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000256190.jpg", "caption": "a group of people sitting around a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125128.jpg", "caption": "a man in a white shirt is putting something in an oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000518355.jpg", "caption": "a sign on a tree", "annotations": [{"polygon": [[155, 132], [197, 143], [187, 163], [150, 153], [146, 143], [149, 135]], "text": "CET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CET", "recog_valid": true, "glyph_recog_text": "CET", "glyph_recog_ld": 1.0}, {"polygon": [[220, 148], [286, 162], [286, 182], [212, 166]], "text": "ARBRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ARBRE", "recog_valid": true, "glyph_recog_text": "ARBRE", "glyph_recog_ld": 1.0}, {"polygon": [[302, 167], [328, 171], [339, 164], [349, 176], [376, 182], [378, 199], [301, 186]], "text": "NAIME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NAIME", "recog_valid": true, "glyph_recog_text": "NAIME", "glyph_recog_ld": 1.0}, {"polygon": [[179, 218], [233, 225], [234, 245], [227, 248], [183, 240]], "text": "VOS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VOS", "recog_valid": true, "glyph_recog_text": "vos", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[66, 269], [61, 284], [195, 299], [199, 285]], "text": "ASSOCIATION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ASSOciAtion", "recog_valid": false, "glyph_recog_text": "ASSOCIATION", "glyph_recog_ld": 0.45454595041277235}, {"polygon": [[260, 229], [260, 249], [394, 266], [391, 246]], "text": "POUBELLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "POUBELLES", "recog_valid": true, "glyph_recog_text": "POUBELLES", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000387296.jpg", "caption": "british airways boeing 747-400", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000518416.jpg", "caption": "a car driving down the street with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125213.jpg", "caption": "a yellow bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000518433.jpg", "caption": "a train is parked on a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125230.jpg", "caption": "a display of apples and other fruits in a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000387392.jpg", "caption": "a black and white photo of people walking in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125252.jpg", "caption": "a cell phone, a pen, a watch, and a wallet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125266.jpg", "caption": "a garbage truck parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000518487.jpg", "caption": "a blender with a red liquid inside", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125275.jpg", "caption": "a bunch of bananas on a table", "annotations": [{"polygon": [[391, 113], [452, 108], [479, 115], [461, 191], [444, 200], [379, 196], [385, 137]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "26", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000518521.jpg", "caption": "two stuffed animals sit on a bed with a pillow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000518522.jpg", "caption": "a train traveling down a country road with a church in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000256384.jpg", "caption": "a living room with a television, a bird cage, and a baseball jersey", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000518546.jpg", "caption": "a train is parked at a station with a planter", "annotations": [{"polygon": [[103, 217], [23, 214], [25, 277], [103, 268]], "text": "BELLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "BELLE", "recog_valid": true, "glyph_recog_text": "BELLE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000387480.jpg", "caption": "a baseball game with a batter and catcher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125341.jpg", "caption": "a man is playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125353.jpg", "caption": "a banana and a bottle of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000256431.jpg", "caption": "a woman in a long coat and hat walking down a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000387512.jpg", "caption": "a white truck with the words associated buyers on it", "annotations": [{"polygon": [[362, 188], [363, 216], [489, 214], [488, 185]], "text": "BUYERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BUYERS", "recog_valid": true, "glyph_recog_text": "BUYERS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000256444.jpg", "caption": "a cell phone and a book on a bed", "annotations": [{"polygon": [[10, 209], [9, 222], [24, 225], [49, 233], [91, 241], [122, 242], [132, 228], [108, 228], [87, 224], [71, 219], [36, 211]], "text": "WRING", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "WIINC", "recog_valid": false, "glyph_recog_text": "WRING", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000387517.jpg", "caption": "a desk with a laptop and a lamp on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000125374.jpg", "caption": "a man holding a snowboard on a snowy slope", "annotations": [{"polygon": [[135, 132], [173, 129], [190, 332], [167, 334]], "text": "HEAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CVEH", "recog_valid": false, "glyph_recog_text": "工山>", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[260, 193], [360, 175], [365, 154], [271, 175]], "text": "AUSTRALIA'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AUSTRALIA'S", "recog_valid": true, "glyph_recog_text": "AUSTRALIA'S", "glyph_recog_ld": 1.0}, {"polygon": [[375, 170], [482, 151], [485, 129], [381, 150]], "text": "OVERSEAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "OVERSEAS", "recog_valid": true, "glyph_recog_text": "OVERSEAS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395013.jpg", "caption": "two police officers on motorcycles with american flags", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001815.jpg", "caption": "a man holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000100124.jpg", "caption": "two men standing in front of a large airplane", "annotations": [{"polygon": [[271, 146], [293, 198], [169, 198], [173, 148]], "text": "PIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PIA", "recog_valid": true, "glyph_recog_text": "PIA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000558876.jpg", "caption": "a double decker bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000362282.jpg", "caption": "a man in a wheelchair is sitting under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000362309.jpg", "caption": "a bus is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000362318.jpg", "caption": "a woman with a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329553.jpg", "caption": "a white dove perched on a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001875.jpg", "caption": "a woman with a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000034645.jpg", "caption": "a black and white photo of a man riding a horse", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395096.jpg", "caption": "a motorcycle with a colorful scarf tied to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526172.jpg", "caption": "a man on a motorcycle riding down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000493407.jpg", "caption": "a woman in a white coat is walking a white cow", "annotations": [{"polygon": [[260, 317], [280, 305], [289, 289], [300, 272], [288, 261], [280, 268], [273, 286], [252, 300]], "text": "610", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "610", "recog_valid": true, "glyph_recog_text": "610", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526187.jpg", "caption": "a clock tower on the side of a building overlooking the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000001911.jpg", "caption": "a man pulling a sled with dogs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000132991.jpg", "caption": "a group of people in suits and ties standing on steps", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427920.jpg", "caption": "two beach chairs with umbrellas on the sand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000231315.jpg", "caption": "a police officer on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000231320.jpg", "caption": "a wall with clocks on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000493477.jpg", "caption": "a fire hydrant is on the side of a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165803.jpg", "caption": "a man cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000460722.jpg", "caption": "a man sitting on a couch", "annotations": [{"polygon": [[171, 450], [173, 482], [201, 482], [209, 482], [235, 479], [237, 456], [211, 451], [201, 450]], "text": "Dollars", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Dollars", "recog_valid": true, "glyph_recog_text": "Dollars", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165820.jpg", "caption": "a baseball player in the outfield", "annotations": [{"polygon": [[117, 19], [117, 19], [268, 20], [268, 74], [264, 77], [126, 77], [115, 30]], "text": "WLMI", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "WLMI", "recog_valid": true, "glyph_recog_text": "WLMI", "glyph_recog_ld": 1.0}, {"polygon": [[298, 20], [389, 17], [401, 36], [398, 50], [384, 77], [293, 76], [286, 39], [286, 30]], "text": "92.9", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "929", "recog_valid": false, "glyph_recog_text": "92.9", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329664.jpg", "caption": "a baseball player standing on a field with a bat", "annotations": [{"polygon": [[50, 158], [39, 204], [102, 205], [100, 159]], "text": "AT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "AT", "recog_valid": true, "glyph_recog_text": "AT", "glyph_recog_ld": 1.0}, {"polygon": [[114, 162], [112, 204], [198, 204], [196, 160]], "text": "BAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BAT", "recog_valid": true, "glyph_recog_text": "BAT", "glyph_recog_ld": 1.0}, {"polygon": [[248, 162], [247, 204], [371, 205], [371, 160]], "text": "BALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BALL", "recog_valid": true, "glyph_recog_text": "BALL", "glyph_recog_ld": 1.0}, {"polygon": [[431, 162], [431, 208], [510, 209], [510, 161]], "text": "STR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STF", "recog_valid": false, "glyph_recog_text": "STR", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[73, 214], [72, 283], [163, 282], [164, 214]], "text": "25", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "25", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000493505.jpg", "caption": "a bed with a canopy and a tapestry hanging on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000231373.jpg", "caption": "a person holding two bananas in front of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526292.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[148, 195], [242, 185], [242, 207], [239, 212], [140, 220]], "text": "ARTICULO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ARTICULO", "recog_valid": true, "glyph_recog_text": "ARTICULO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427990.jpg", "caption": "a fighter jet flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002007.jpg", "caption": "a woman is playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000427992.jpg", "caption": "a street sign with a street name and a street name", "annotations": [{"polygon": [[180, 362], [180, 362], [320, 356], [328, 363], [331, 387], [323, 394], [198, 399], [182, 401], [175, 400], [172, 391], [173, 369], [176, 364]], "text": "SEDROC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SEDROC", "recog_valid": true, "glyph_recog_text": "SEDROC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526301.jpg", "caption": "a parking meter with a time on it", "annotations": [{"polygon": [[277, 220], [309, 224], [304, 254], [270, 249]], "text": "00:", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "06", "recog_valid": false, "glyph_recog_text": "00:", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[317, 223], [311, 253], [344, 256], [349, 227]], "text": ";29", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "29", "recog_valid": false, "glyph_recog_text": ";29", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165893.jpg", "caption": "a black truck with smoke coming out of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526350.jpg", "caption": "a woman sitting on a bench with a towel over her head", "annotations": [{"polygon": [[385, 444], [399, 401], [350, 392], [327, 435]], "text": "N", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "N", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395292.jpg", "caption": "a family sitting at a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264233.jpg", "caption": "a man and woman sitting on boxes in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000165955.jpg", "caption": "a long narrow hallway with books on shelves", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526403.jpg", "caption": "a baseball player sliding into home plate", "annotations": [{"polygon": [[164, 238], [196, 241], [197, 268], [167, 265]], "text": "33", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "33", "recog_valid": true, "glyph_recog_text": "33", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428109.jpg", "caption": "a bunch of bananas with a sign that says free", "annotations": [{"polygon": [[147, 285], [135, 248], [271, 204], [283, 251]], "text": "FREE !", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FREE", "recog_valid": false, "glyph_recog_text": "FREE !", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[104, 356], [142, 312], [172, 334], [130, 388]], "text": "have ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "hone", "recog_valid": false, "glyph_recog_text": "have", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[163, 316], [175, 299], [196, 313], [181, 334]], "text": "a", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "d", "recog_valid": false, "glyph_recog_text": "0", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[153, 394], [224, 305], [255, 325], [182, 415]], "text": "banana", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "banana", "recog_valid": true, "glyph_recog_text": "banana", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559186.jpg", "caption": "dixie hay next signal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133208.jpg", "caption": "a stop sign and street signs on a pole", "annotations": [{"polygon": [[248, 323], [363, 328], [361, 376], [246, 375]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[254, 220], [311, 231], [311, 255], [254, 245]], "text": "BRIDGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BRIDGE", "recog_valid": true, "glyph_recog_text": "BRIDGE", "glyph_recog_ld": 1.0}, {"polygon": [[260, 200], [323, 178], [325, 203], [260, 225]], "text": "RAILROAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PTIRCAD", "recog_valid": false, "glyph_recog_text": "RALROAD", "glyph_recog_ld": 0.428572244896793}, {"polygon": [[209, 223], [211, 287], [246, 277], [255, 254], [255, 211], [250, 206]], "text": "29", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "之", "recog_valid": false, "glyph_recog_text": "-N O)", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[209, 136], [205, 182], [212, 181], [250, 160], [253, 109]], "text": "JCT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "→0", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000329817.jpg", "caption": "a group of people playing soccer on a field", "annotations": [{"polygon": [[313, 219], [311, 249], [374, 249], [378, 249], [376, 220]], "text": "ITAU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Itau", "recog_valid": false, "glyph_recog_text": "ITAU", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[425, 224], [426, 252], [489, 254], [490, 226]], "text": "ITAU", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Itau", "recog_valid": false, "glyph_recog_text": "ITAU", "glyph_recog_ld": 0.25000187499531246}, {"polygon": [[464, 136], [466, 174], [512, 174], [513, 135]], "text": "Fo", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "FU", "recog_valid": false, "glyph_recog_text": "Fo", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[1, 127], [0, 165], [226, 168], [227, 131], [68, 127]], "text": "JOVEM", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "JOET", "recog_valid": false, "glyph_recog_text": "JOVEM", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000100468.jpg", "caption": "a stop sign in front of a house", "annotations": [{"polygon": [[125, 262], [123, 298], [195, 298], [206, 267]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[215, 246], [215, 284], [273, 285], [279, 264], [274, 248]], "text": "NO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NO", "recog_valid": true, "glyph_recog_text": "NO", "glyph_recog_ld": 1.0}, {"polygon": [[282, 250], [294, 285], [383, 286], [386, 250]], "text": "THRU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THRU", "recog_valid": true, "glyph_recog_text": "THRU", "glyph_recog_ld": 1.0}, {"polygon": [[384, 294], [385, 324], [227, 319], [216, 292]], "text": "TRAFFIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TRAFFIC", "recog_valid": true, "glyph_recog_text": "TRAFFIC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198785.jpg", "caption": "three women posing on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166018.jpg", "caption": "a living room with a fireplace and lots of boxes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264340.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[348, 75], [347, 84], [363, 104], [426, 104], [426, 74], [369, 67]], "text": "lcre", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "slcre", "recog_valid": false, "glyph_recog_text": "lcre", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264365.jpg", "caption": "the bear and the coffee table", "annotations": [{"polygon": [[371, 299], [380, 312], [480, 284], [472, 274]], "text": "BRYSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BRYSON", "recog_valid": true, "glyph_recog_text": "BRYSON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428208.jpg", "caption": "a bunch of containers filled with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133298.jpg", "caption": "a tennis racket and newspaper on display", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395444.jpg", "caption": "a rabbit eating a banana", "annotations": [{"polygon": [[176, 225], [176, 225], [201, 268], [190, 279], [281, 327], [279, 344], [261, 343], [210, 322], [162, 284], [150, 265], [158, 244]], "text": "WELCOME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Mi ccoms", "recog_valid": false, "glyph_recog_text": "WELCOME", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[293, 332], [293, 332], [364, 342], [372, 349], [368, 366], [321, 363], [285, 355]], "text": "HOME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HOAE", "recog_valid": false, "glyph_recog_text": "HOME", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[228, 348], [246, 355], [273, 362], [294, 364], [295, 372], [290, 385], [262, 377], [249, 375], [226, 365]], "text": "OERET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "专保住", "recog_valid": false, "glyph_recog_text": "OERET", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067768.jpg", "caption": "a train traveling down the tracks in a rural area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000231631.jpg", "caption": "a market with umbrellas and tables with fruit and vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035037.jpg", "caption": "a man on a skateboard doing a trick in a parking garage", "annotations": [{"polygon": [[351, 156], [394, 153], [400, 230], [359, 232]], "text": "1", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "-", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461027.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395495.jpg", "caption": "a large jet airplane flying through a clear blue sky", "annotations": [{"polygon": [[252, 236], [261, 241], [237, 270], [231, 265]], "text": "QATAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "aVIVO", "recog_valid": false, "glyph_recog_text": "QATAR", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067838.jpg", "caption": "a boy riding a surfboard in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000526599.jpg", "caption": "a man drinking water from a bottle in a market", "annotations": [{"polygon": [[5, 154], [31, 170], [40, 143], [12, 135]], "text": "C", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "c", "recog_valid": false, "glyph_recog_text": "c", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395527.jpg", "caption": "a group of students standing in a park talking", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000067849.jpg", "caption": "a man in a black boat on a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198936.jpg", "caption": "a woman sitting on a couch with a dog", "annotations": [{"polygon": [[201, 236], [190, 289], [171, 296], [151, 265], [157, 250]], "text": "Y", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "%", "recog_valid": false, "glyph_recog_text": "Y", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[152, 212], [163, 249], [178, 248], [198, 213], [181, 203], [166, 203]], "text": "Y", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S", "recog_valid": false, "glyph_recog_text": ">", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[153, 182], [160, 188], [153, 228], [133, 224], [127, 191]], "text": "Y", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": ">", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[126, 236], [136, 260], [161, 261], [165, 245], [161, 226], [152, 221]], "text": "Y", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "9", "recog_valid": false, "glyph_recog_text": "<", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264473.jpg", "caption": "two cats standing next to a car", "annotations": [{"polygon": [[234, 165], [286, 165], [286, 198], [235, 200]], "text": "NX", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NX", "recog_valid": true, "glyph_recog_text": "NX", "glyph_recog_ld": 1.0}, {"polygon": [[302, 165], [344, 164], [344, 198], [301, 196]], "text": "- .83", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "83", "recog_valid": false, "glyph_recog_text": "-.83", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[357, 164], [399, 162], [399, 196], [359, 196]], "text": "76", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "76", "recog_valid": true, "glyph_recog_text": "76", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035101.jpg", "caption": "a plane parked at the airport at sunset", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461086.jpg", "caption": "a horse drawn carriage in front of a large building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000198974.jpg", "caption": "a green motorcycle with a helmet on it", "annotations": [{"polygon": [[229, 303], [229, 290], [208, 273], [191, 281], [198, 289], [207, 294], [199, 295], [206, 299], [214, 298]], "text": "Ninja", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "siafa", "recog_valid": false, "glyph_recog_text": "Ninja", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002374.jpg", "caption": "a pizza with a card on it that says bing pizza", "annotations": [{"polygon": [[366, 356], [360, 387], [448, 393], [452, 354]], "text": "BING", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BING", "recog_valid": true, "glyph_recog_text": "BING", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166230.jpg", "caption": "two pictures of a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002402.jpg", "caption": "a group of people standing in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166275.jpg", "caption": "a basket of bananas and a watermelon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428420.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133509.jpg", "caption": "a pair of scissors and a bag of leaves", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297359.jpg", "caption": "two children sitting on a carpet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000100757.jpg", "caption": "a large airplane with a cartoon on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199063.jpg", "caption": "a white sign on a brick wall", "annotations": [{"polygon": [[24, 241], [23, 209], [115, 211], [116, 241]], "text": "KING", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "KING", "recog_valid": true, "glyph_recog_text": "KING", "glyph_recog_ld": 1.0}, {"polygon": [[131, 241], [131, 209], [304, 210], [305, 242]], "text": "HARALD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HARALD", "recog_valid": true, "glyph_recog_text": "HARALD", "glyph_recog_ld": 1.0}, {"polygon": [[324, 240], [323, 211], [475, 213], [474, 243]], "text": "STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STREET", "recog_valid": true, "glyph_recog_text": "STREET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000493978.jpg", "caption": "a stop sign with a 4-way sign on it", "annotations": [{"polygon": [[202, 139], [367, 166], [368, 220], [187, 192]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[205, 273], [233, 278], [226, 318], [195, 312]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "寸", "recog_valid": false, "glyph_recog_text": "v", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[255, 281], [341, 294], [337, 336], [251, 322]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000231840.jpg", "caption": "a street sign and a light pole", "annotations": [{"polygon": [[226, 213], [226, 213], [331, 160], [338, 174], [337, 175], [231, 226], [232, 215]], "text": "Talbot", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Talbot", "recog_valid": true, "glyph_recog_text": "Talbot", "glyph_recog_ld": 1.0}, {"polygon": [[145, 359], [239, 326], [240, 335], [141, 369]], "text": "Baltimore", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "aaitimore", "recog_valid": false, "glyph_recog_text": "MAlLiORIe", "glyph_recog_ld": 0.22222308641879285}, {"polygon": [[228, 351], [296, 328], [296, 339], [227, 362]], "text": "LANES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LANES", "recog_valid": true, "glyph_recog_text": "工h陆艺5", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000559522.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[425, 214], [407, 231], [422, 246], [447, 229]], "text": "w", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "E", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002466.jpg", "caption": "a woman sitting at a table with a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000493996.jpg", "caption": "a stop sign with a picture of a shark", "annotations": [{"polygon": [[212, 153], [219, 159], [280, 188], [286, 191], [288, 167], [275, 158], [260, 149], [239, 137], [215, 129]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000362936.jpg", "caption": "a group of people on a safari vehicle with elephants", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035265.jpg", "caption": "two baseball players are holding bats in the air", "annotations": [{"polygon": [[100, 305], [88, 371], [142, 385], [155, 317]], "text": "21", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "h", "recog_valid": false, "glyph_recog_text": "忆", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000362973.jpg", "caption": "two trucks parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428534.jpg", "caption": "a cat sitting on a chair in front of a menu board", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000395780.jpg", "caption": "a living room with green walls and a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000363014.jpg", "caption": "a baseball player in a white uniform holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002567.jpg", "caption": "a woman holding a frisbee in a gymnasium", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000363028.jpg", "caption": "a man and a woman sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199196.jpg", "caption": "a stop sign with one way and one way signs", "annotations": [{"polygon": [[175, 60], [228, 70], [228, 106], [174, 96]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[242, 74], [296, 89], [292, 124], [244, 116]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[162, 182], [220, 175], [222, 208], [161, 216]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[235, 171], [303, 163], [300, 200], [238, 205]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[174, 300], [341, 287], [341, 378], [174, 390]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133672.jpg", "caption": "a pizza in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002623.jpg", "caption": "two boats docked at a dock with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199234.jpg", "caption": "two men playing a video game", "annotations": [{"polygon": [[183, 268], [213, 255], [236, 262], [231, 241], [212, 225], [185, 232]], "text": "ALIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALlS", "recog_valid": false, "glyph_recog_text": "ALIS", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000428619.jpg", "caption": "a young boy in a baseball uniform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000264797.jpg", "caption": "an old truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035422.jpg", "caption": "a sign that says yocante park", "annotations": [{"polygon": [[191, 132], [202, 167], [377, 160], [374, 125]], "text": "YOCTANGEE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YOCTANGEE", "recog_valid": true, "glyph_recog_text": "YOCTANGEE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002658.jpg", "caption": "a man in a yellow shirt and black pants is walking with other men", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000494199.jpg", "caption": "several green peppers on a cutting board", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297600.jpg", "caption": "a group of men playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000101011.jpg", "caption": "a street sign with a fish on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166556.jpg", "caption": "two men in the water with surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000068261.jpg", "caption": "a street with cars parked on both sides of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000494248.jpg", "caption": "a refrigerator with drawers and a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035497.jpg", "caption": "a train is pulling into a station with a train behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461484.jpg", "caption": "a group of skiers racing down a snowy slope", "annotations": [{"polygon": [[0, 206], [0, 230], [41, 207], [37, 186]], "text": "MANN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "MANK", "recog_valid": false, "glyph_recog_text": "WANN", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000494264.jpg", "caption": "a clock with a glass dome and a clock face", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002755.jpg", "caption": "a group of kids running on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035529.jpg", "caption": "a shelf with a lot of luggage on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000101079.jpg", "caption": "a man playing a video game", "annotations": [{"polygon": [[382, 334], [358, 324], [352, 311], [341, 291], [340, 279], [354, 289], [360, 305], [379, 315]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ea", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461545.jpg", "caption": "a man holding a wii remote", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000363247.jpg", "caption": "a remote control and a dvd player", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000133885.jpg", "caption": "magic bullet blender", "annotations": [{"polygon": [[220, 64], [322, 65], [321, 106], [220, 107]], "text": "maGic", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "maGiC", "recog_valid": false, "glyph_recog_text": "maGic", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[127, 96], [132, 113], [187, 93], [185, 81]], "text": "RECIDES", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "RECIPES", "recog_valid": false, "glyph_recog_text": "RECIDES", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035589.jpg", "caption": "a baseball game in progress with a batter at bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000101132.jpg", "caption": "a kitchen with a stove and a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000494351.jpg", "caption": "a dirty toilet with a box of toilet paper next to it", "annotations": [{"polygon": [[445, 370], [464, 375], [509, 396], [508, 408], [476, 397], [467, 394], [448, 388], [438, 384]], "text": "Advance", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Advanc", "recog_valid": false, "glyph_recog_text": "Advance", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[439, 387], [438, 401], [481, 417], [485, 412], [482, 404], [465, 394], [451, 389]], "text": "Litho", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Litho", "recog_valid": true, "glyph_recog_text": "Litho", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461600.jpg", "caption": "a cat laying on a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297765.jpg", "caption": "a man cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461609.jpg", "caption": "a parking meter with a solar panel on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396073.jpg", "caption": "a view of a traffic light through a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166699.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396082.jpg", "caption": "a man wearing a horse mask", "annotations": [{"polygon": [[149, 450], [145, 489], [195, 480], [195, 449]], "text": "KAT16", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "人村", "recog_valid": false, "glyph_recog_text": "KAT16", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[204, 449], [197, 496], [271, 489], [268, 457]], "text": "ANDERSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "AYdney", "recog_valid": false, "glyph_recog_text": "NIOERSON", "glyph_recog_ld": 1.249998437424793e-06}, {"polygon": [[281, 437], [271, 486], [360, 493], [362, 461]], "text": "PhotogRAphy", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "cllengely", "recog_valid": false, "glyph_recog_text": "PhotogfRAptmy", "glyph_recog_ld": 0.1538468047332271}, {"polygon": [[114, 447], [138, 441], [114, 415], [90, 425]], "text": "2:34:56", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "S6an", "recog_valid": false, "glyph_recog_text": "2.34:56", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000068420.jpg", "caption": "a pole with a stop sign, a street sign, and a traffic sign", "annotations": [{"polygon": [[249, 393], [277, 391], [280, 395], [280, 402], [280, 406], [277, 408], [274, 408], [274, 422], [249, 425], [246, 423], [245, 416], [245, 404], [245, 398], [246, 395]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000494415.jpg", "caption": "a group of people walking in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396116.jpg", "caption": "a clock on a pole in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000363353.jpg", "caption": "a woman walking a horse in a yellow jacket", "annotations": [{"polygon": [[92, 223], [183, 243], [179, 273], [158, 271], [89, 253]], "text": "RIANAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ANAR", "recog_valid": false, "glyph_recog_text": "RIANAR", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396150.jpg", "caption": "a woman holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000101243.jpg", "caption": "a baseball player throwing a ball", "annotations": [{"polygon": [[13, 88], [223, 79], [218, 113], [11, 118]], "text": "Unibaseball.org", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Unibaseball.org", "recog_valid": true, "glyph_recog_text": "Unibaseball.org", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396159.jpg", "caption": "a bus parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000232322.jpg", "caption": "a hand holding a camera in front of a display", "annotations": [{"polygon": [[0, 110], [70, 120], [75, 142], [0, 132]], "text": "W53CA", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "N53CA", "recog_valid": false, "glyph_recog_text": "W53CA", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[401, 178], [393, 210], [427, 223], [437, 187]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "P", "recog_valid": true, "glyph_recog_text": "a", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035715.jpg", "caption": "a pizza on a pan", "annotations": [{"polygon": [[85, 496], [85, 467], [108, 460], [122, 461], [137, 460], [149, 467], [245, 466], [256, 460], [260, 461], [264, 471], [263, 487], [258, 489], [215, 488], [190, 497], [182, 487], [110, 489], [95, 497]], "text": "yikhug.net", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "gikiting.ncl", "recog_valid": false, "glyph_recog_text": "yikhug.net", "glyph_recog_ld": 0.41666715277737265}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000363410.jpg", "caption": "a large stainless steel refrigerator with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000002964.jpg", "caption": "a group of wine bottles on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330649.jpg", "caption": "a bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527277.jpg", "caption": "a baseball player is swinging a bat at a ball", "annotations": [{"polygon": [[37, 201], [36, 231], [122, 234], [123, 204]], "text": "Jeep", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Jeen", "recog_valid": false, "glyph_recog_text": "Jeep", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[412, 195], [415, 225], [511, 224], [511, 193]], "text": "DESI", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "DESI", "recog_valid": true, "glyph_recog_text": "DESI", "glyph_recog_ld": 1.0}, {"polygon": [[277, 195], [288, 226], [396, 224], [396, 215], [397, 210], [395, 197], [388, 195]], "text": "YARD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YARD", "recog_valid": true, "glyph_recog_text": "YARD", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560047.jpg", "caption": "a banana, an orange and a smiley face on a wall", "annotations": [{"polygon": [[247, 447], [247, 388], [491, 390], [493, 448]], "text": "chillin", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "chillin", "recog_valid": true, "glyph_recog_text": "chillin", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265139.jpg", "caption": "a man playing tennis on a court with a ball", "annotations": [{"polygon": [[83, 182], [199, 178], [200, 232], [81, 234]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "THE", "recog_valid": true, "glyph_recog_text": "THE", "glyph_recog_ld": 1.0}, {"polygon": [[223, 175], [511, 166], [511, 229], [222, 232]], "text": "CHAMPION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CHAMPI", "recog_valid": false, "glyph_recog_text": "CHAMPION", "glyph_recog_ld": 0.7500003124996093}, {"polygon": [[276, 272], [511, 274], [511, 319], [278, 317]], "text": "LISAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AIISADI", "recog_valid": false, "glyph_recog_text": "LISAD", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035783.jpg", "caption": "a stop sign with a cloudy sky behind it", "annotations": [{"polygon": [[294, 207], [363, 208], [367, 241], [290, 239]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000035796.jpg", "caption": "a fire hydrant is sitting in front of a building", "annotations": [{"polygon": [[176, 62], [176, 88], [342, 95], [344, 68]], "text": "HYDRANT", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HYDRANT", "recog_valid": true, "glyph_recog_text": "HYDRANT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265173.jpg", "caption": "a woman walking down a street near a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000461794.jpg", "caption": "a man playing a video game on a wii", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265187.jpg", "caption": "a man is standing outside of a pink building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000101355.jpg", "caption": "a man is making a drink in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000297967.jpg", "caption": "a bus driving down a street with a hill in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166903.jpg", "caption": "a stop sign is in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396284.jpg", "caption": "chicken thighs, carrots, onions, celery, and garlic", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396321.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[260, 196], [259, 234], [373, 237], [374, 198]], "text": "ORD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ORD", "recog_valid": true, "glyph_recog_text": "ORD", "glyph_recog_ld": 1.0}, {"polygon": [[385, 200], [383, 237], [512, 241], [512, 203]], "text": "MINN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MINI", "recog_valid": false, "glyph_recog_text": "MINN", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[0, 157], [0, 180], [35, 180], [34, 159], [24, 150]], "text": "nk", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "nk", "recog_valid": true, "glyph_recog_text": "nk", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429096.jpg", "caption": "a woman is standing on a sidewalk talking on her cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000232500.jpg", "caption": "a large airplane parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330806.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[216, 190], [240, 208], [242, 214], [227, 228], [218, 223], [205, 211]], "text": "28", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "28", "recog_valid": true, "glyph_recog_text": "28", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429111.jpg", "caption": "a table with a laptop, a cell phone, a wallet, a pen, a cell phone charger, a cell phone, a pen, a cell phone charger", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429119.jpg", "caption": "a small vehicle with boxes on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000494660.jpg", "caption": "a skateboarder is riding on a ramp", "annotations": [{"polygon": [[20, 357], [30, 392], [97, 384], [81, 352]], "text": "VAAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "VANS", "recog_valid": false, "glyph_recog_text": "VAAS", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[0, 397], [10, 425], [53, 424], [39, 392]], "text": "OFF", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "OFF", "recog_valid": true, "glyph_recog_text": "OFF", "glyph_recog_ld": 1.0}, {"polygon": [[45, 394], [65, 426], [77, 426], [106, 426], [111, 426], [88, 387]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "营", "recog_valid": false, "glyph_recog_text": "THE", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[92, 387], [118, 423], [126, 420], [134, 415], [140, 409], [148, 400], [150, 397], [136, 379]], "text": "WALL", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WA", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330824.jpg", "caption": "a woman swinging a tennis racket at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265292.jpg", "caption": "a plate of food and a glass of beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000166997.jpg", "caption": "a person riding a dirt bike on a dirt track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003157.jpg", "caption": "a man holding a tennis racket", "annotations": [{"polygon": [[239, 118], [236, 182], [408, 196], [410, 132]], "text": "EGG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EGG", "recog_valid": true, "glyph_recog_text": "EGG", "glyph_recog_ld": 1.0}, {"polygon": [[435, 124], [428, 197], [511, 202], [510, 126]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "M", "recog_valid": true, "glyph_recog_text": "M", "glyph_recog_ld": 1.0}, {"polygon": [[292, 203], [282, 214], [288, 232], [440, 246], [453, 221]], "text": "AL ASSET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AL ASSET", "recog_valid": true, "glyph_recog_text": "AL ASSET", "glyph_recog_ld": 1.0}, {"polygon": [[468, 220], [463, 247], [512, 251], [511, 224]], "text": "MA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MA", "recog_valid": true, "glyph_recog_text": "MA", "glyph_recog_ld": 1.0}, {"polygon": [[425, 342], [421, 391], [463, 394], [467, 342]], "text": " w", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "11", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560217.jpg", "caption": "a cat playing with a toy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330862.jpg", "caption": "a motorcycle racer is racing down a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000494711.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134271.jpg", "caption": "a man sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298112.jpg", "caption": "a brown jacket with a tie with the words will you kunt?", "annotations": [{"polygon": [[216, 252], [312, 253], [303, 306], [210, 307]], "text": "knot?", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "knet?", "recog_valid": false, "glyph_recog_text": "knot?", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[221, 236], [223, 262], [257, 277], [260, 249], [245, 238]], "text": "why", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wly", "recog_valid": false, "glyph_recog_text": "why", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560270.jpg", "caption": "a group of police officers on motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000494749.jpg", "caption": "a car driving down a street with a stop sign and a bus driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134302.jpg", "caption": "a man is standing next to a large truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429241.jpg", "caption": "a group of people standing on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298170.jpg", "caption": "a train traveling down the tracks with a few cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560325.jpg", "caption": "a black steam engine train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298182.jpg", "caption": "orange bike with white wheels on brick sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000330952.jpg", "caption": "a group of people walking in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134344.jpg", "caption": "a woman with sunglasses", "annotations": [{"polygon": [[0, 372], [104, 366], [104, 392], [0, 407]], "text": "TENTION", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "TENTION", "recog_valid": true, "glyph_recog_text": "TENTION", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000068817.jpg", "caption": "a train with a green and purple stripe on it", "annotations": [{"polygon": [[168, 304], [222, 312], [224, 304], [228, 296], [230, 301], [227, 306], [226, 310], [229, 309], [233, 313], [243, 316], [242, 336], [168, 320]], "text": "iNTERCiTeS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NTERCT2s", "recog_valid": false, "glyph_recog_text": "INTERCTeS", "glyph_recog_ld": 0.6666670370366254}, {"polygon": [[283, 317], [289, 315], [294, 315], [340, 322], [339, 326], [334, 336], [326, 335], [324, 345], [279, 336], [279, 334], [281, 322]], "text": "SNCF", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SNCF", "recog_valid": true, "glyph_recog_text": "SNCF", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036068.jpg", "caption": "a variety of fruits and vegetables are displayed in crates", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000462062.jpg", "caption": "a dog is walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396536.jpg", "caption": "a tow truck is driving down the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000462075.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[145, 278], [140, 297], [166, 309], [175, 284]], "text": "55", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "55", "recog_valid": true, "glyph_recog_text": "55", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429309.jpg", "caption": "a dog laying in a bed next to a computer desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167169.jpg", "caption": "a female softball player in action on the field", "annotations": [{"polygon": [[264, 182], [252, 231], [295, 244], [306, 194]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "13", "recog_valid": true, "glyph_recog_text": "2", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429312.jpg", "caption": "a living room with a couch, a tv, and a water bottle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000494860.jpg", "caption": "a man is playing tennis on a blue court", "annotations": [{"polygon": [[428, 114], [478, 150], [502, 147], [454, 112]], "text": "MADRIO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MADRO", "recog_valid": false, "glyph_recog_text": "MADRIO", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000527628.jpg", "caption": "two women walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199956.jpg", "caption": "a stop sign and street signs on a street corner", "annotations": [{"polygon": [[39, 174], [38, 213], [125, 210], [133, 170]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[56, 63], [56, 71], [101, 100], [108, 100], [107, 89], [72, 64]], "text": "DGE WOOD", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "MIOGEWOO", "recog_valid": false, "glyph_recog_text": "DOE WOOD", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396581.jpg", "caption": "a red bench in front of a shop window", "annotations": [{"polygon": [[206, 120], [199, 164], [282, 161], [291, 129], [291, 129]], "text": "Bird", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3m", "recog_valid": false, "glyph_recog_text": "Bird", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000462123.jpg", "caption": "a group of men sitting on motorcycles in a dirt area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560431.jpg", "caption": "a man holding a kite", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000232755.jpg", "caption": "a black and white photo of a truck with a house on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000363829.jpg", "caption": "a man in white playing tennis", "annotations": [{"polygon": [[374, 172], [393, 192], [371, 208], [365, 206], [356, 195], [356, 186]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "W", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000199992.jpg", "caption": "a man holding a box of hot dogs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000462151.jpg", "caption": "a man in a white apron", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134479.jpg", "caption": "a young man in a blue hat is playing tennis", "annotations": [{"polygon": [[258, 249], [271, 262], [316, 243], [306, 228]], "text": "TENNIS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NNIS", "recog_valid": false, "glyph_recog_text": "TENNIS", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134486.jpg", "caption": "a boy is feeding a giraffe at the zoo", "annotations": [{"polygon": [[100, 414], [130, 384], [178, 370], [220, 387], [228, 413], [228, 427], [211, 428], [183, 401], [147, 407], [119, 432]], "text": "EROPOSTAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Loease", "recog_valid": false, "glyph_recog_text": "EROPOSTAL", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134502.jpg", "caption": "a sticker on a stop sign", "annotations": [{"polygon": [[145, 287], [426, 284], [419, 179], [154, 182]], "text": "ALTO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALTO", "recog_valid": true, "glyph_recog_text": "ALTO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000331119.jpg", "caption": "a mother elephant and her baby elephant walking in the sand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429446.jpg", "caption": "a man jumping in the air to catch a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265625.jpg", "caption": "a man and woman sitting at a table with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265645.jpg", "caption": "an orange train is traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167348.jpg", "caption": "a young girl holding a tennis ball and a racket", "annotations": [{"polygon": [[173, 333], [195, 295], [190, 262], [184, 294], [165, 324]], "text": "on", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "on", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[157, 310], [166, 323], [186, 294], [188, 255], [173, 231], [180, 264], [173, 285]], "text": "OPEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SOPEN", "recog_valid": false, "glyph_recog_text": "N3d0", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036281.jpg", "caption": "a woman is teaching children about toothbrushes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000363987.jpg", "caption": "a young boy holding a tennis racket and a tennis ball", "annotations": [{"polygon": [[320, 277], [335, 277], [348, 270], [349, 250], [335, 230], [317, 234], [315, 257]], "text": "O2", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "02", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000462292.jpg", "caption": "a red train with a bicycle on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167397.jpg", "caption": "a group of people standing in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000069117.jpg", "caption": "a white car is parked at a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000331266.jpg", "caption": "a laptop computer sitting on a table with a cup of coffee", "annotations": [{"polygon": [[148, 165], [153, 181], [130, 224], [122, 210]], "text": "SUBWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SUBWA", "recog_valid": false, "glyph_recog_text": "SUBWAY", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003602.jpg", "caption": "westfield police department", "annotations": [{"polygon": [[207, 139], [219, 128], [228, 122], [249, 116], [262, 115], [281, 118], [299, 125], [321, 143], [312, 155], [307, 154], [297, 145], [285, 138], [266, 134], [250, 135], [231, 143], [220, 149]], "text": "WESTERVILLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NESTERVILLE", "recog_valid": false, "glyph_recog_text": "WESTERVILLE", "glyph_recog_ld": 0.909090991735462}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000200213.jpg", "caption": "two pictures of people with pizza and a woman smiling", "annotations": [{"polygon": [[82, 107], [81, 131], [0, 122], [0, 98]], "text": "MEXICO", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "oixaM", "recog_valid": false, "glyph_recog_text": "MEXICO", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[206, 113], [205, 137], [89, 131], [91, 105]], "text": "Productos", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "eotoubom", "recog_valid": false, "glyph_recog_text": "Productos", "glyph_recog_ld": 0.22222308641879285}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298564.jpg", "caption": "a bag, a book, a purse, a phone, a wallet, a hairbrush, a book, a pen, a book, a purse, a", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167506.jpg", "caption": "a red car parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560723.jpg", "caption": "a street light with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036448.jpg", "caption": "a train is traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000396896.jpg", "caption": "a stop sign and a herd of horses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429688.jpg", "caption": "a soccer player is trying to kick the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036498.jpg", "caption": "a woman in a white shirt and black shorts is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000429727.jpg", "caption": "a stop sign with the word hate written on it", "annotations": [{"polygon": [[80, 215], [80, 215], [78, 306], [433, 307], [437, 219]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000233137.jpg", "caption": "a black and white photo of a plane on the ground", "annotations": [{"polygon": [[168, 340], [172, 355], [184, 352], [222, 340], [217, 323]], "text": "SERVIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SERVIC", "recog_valid": true, "glyph_recog_text": "SERVIC", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000102073.jpg", "caption": "a large airplane parked on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134849.jpg", "caption": "a group of people standing around a table with pizza boxes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134858.jpg", "caption": "a man in white playing tennis on a green field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000364251.jpg", "caption": "a bicycle and a wooden chair in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000495334.jpg", "caption": "a group of people in a canoe on the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167671.jpg", "caption": "a person sitting in a chair on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000265982.jpg", "caption": "a tall clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036606.jpg", "caption": "a group of people standing around a luggage carousel", "annotations": [{"polygon": [[353, 149], [353, 221], [396, 219], [391, 145]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "寸", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036622.jpg", "caption": "a table with a tray of hot dogs and a bowl of chili", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000298777.jpg", "caption": "a kitchen with a microwave and a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036639.jpg", "caption": "a blue and white van parked next to a stop sign", "annotations": [{"polygon": [[409, 156], [462, 148], [463, 173], [410, 178]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134955.jpg", "caption": "a plate with chocolate cake and a spoon", "annotations": [{"polygon": [[68, 216], [176, 103], [232, 84], [316, 93], [350, 126], [337, 143], [265, 127], [194, 140], [184, 152], [201, 171], [187, 179], [168, 158], [119, 250]], "text": "Congratulations", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cseeiralelon", "recog_valid": false, "glyph_recog_text": "Congratulations", "glyph_recog_ld": 0.3333337777774814}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000134958.jpg", "caption": "a horse and carriage on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000102202.jpg", "caption": "a small train is traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167746.jpg", "caption": "a train with red, white and blue stripes", "annotations": [{"polygon": [[12, 121], [131, 139], [131, 136], [146, 137], [146, 141], [158, 143], [158, 159], [5, 140]], "text": "AGUARAGUE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "AGUARAGUE", "recog_valid": true, "glyph_recog_text": "AGUARAGUE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000331588.jpg", "caption": "a woman behind the counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167748.jpg", "caption": "a black bear standing on its hind legs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528198.jpg", "caption": "a truck towing an rv trailer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000233290.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[331, 207], [329, 233], [359, 225], [397, 218], [393, 201], [366, 202]], "text": "Clarion", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Clrrisp", "recog_valid": false, "glyph_recog_text": "Clarion", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000560992.jpg", "caption": "a red motorcycle parked next to a brick wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135015.jpg", "caption": "a boy sitting at a table with a lot of paper and scissors", "annotations": [{"polygon": [[417, 195], [424, 204], [456, 182], [449, 174]], "text": "6003", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "6007", "recog_valid": false, "glyph_recog_text": "2293", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561006.jpg", "caption": "a street with several buses parked on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528243.jpg", "caption": "a woman riding a bicycle on a cobblestone street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000003967.jpg", "caption": "a woman and a child are standing next to a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000364420.jpg", "caption": "our friends last longer than we do", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036757.jpg", "caption": "a cat sitting on a motorcycle seat", "annotations": [{"polygon": [[177, 356], [276, 350], [282, 376], [185, 381], [184, 373]], "text": "YAMAHA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YAMAHA", "recog_valid": true, "glyph_recog_text": "YAMAHA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397219.jpg", "caption": "a white van driving down a street with a few signs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000200619.jpg", "caption": "a red banner hanging over a busy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000331724.jpg", "caption": "a woman on a cell phone talking to a man selling fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000495568.jpg", "caption": "a group of people standing in the rain with umbrellas", "annotations": [{"polygon": [[338, 124], [328, 149], [275, 155], [296, 134]], "text": "JAZZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZZ", "recog_valid": false, "glyph_recog_text": "JAZZ", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000167892.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000331738.jpg", "caption": "a small biplane flying in the sky with a blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000462833.jpg", "caption": "a group of people loading bikes onto a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036850.jpg", "caption": "an airplane parked on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561168.jpg", "caption": "a group of people standing around a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036881.jpg", "caption": "a woman in a red dress is walking on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000462872.jpg", "caption": "a train on a bridge", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561186.jpg", "caption": "a cow crossing the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397349.jpg", "caption": "a man riding a motorcycle down a street with a large object on the back", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004138.jpg", "caption": "a man on a skateboard", "annotations": [{"polygon": [[51, 115], [44, 210], [80, 204], [85, 119]], "text": "TYPE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "EXLW", "recog_valid": false, "glyph_recog_text": "->a", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[38, 220], [44, 262], [72, 262], [72, 219]], "text": "H", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "10", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430122.jpg", "caption": "a street with cars and traffic lights", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299055.jpg", "caption": "two elephants standing in a dirt area", "annotations": [{"polygon": [[466, 413], [466, 334], [507, 334], [505, 413]], "text": "Beck", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "peg", "recog_valid": false, "glyph_recog_text": "m ①", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000561201.jpg", "caption": "a picture of pasta with red peppers and broccoli", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000462899.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[143, 170], [144, 200], [259, 195], [255, 165]], "text": "PROTEST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PROTF", "recog_valid": false, "glyph_recog_text": "PROTEST", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135237.jpg", "caption": "a man and a woman walking down a street", "annotations": [{"polygon": [[126, 100], [126, 131], [211, 138], [212, 106]], "text": "Nido", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Nido", "recog_valid": true, "glyph_recog_text": "Nido", "glyph_recog_ld": 1.0}, {"polygon": [[235, 268], [237, 301], [323, 302], [324, 271]], "text": "johnob", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JCHNBCE", "recog_valid": false, "glyph_recog_text": "johnob", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135242.jpg", "caption": "a group of people playing a video game", "annotations": [{"polygon": [[191, 175], [191, 175], [201, 194], [227, 181], [222, 158], [209, 165]], "text": "Wii", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wi", "recog_valid": false, "glyph_recog_text": "Wii", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135270.jpg", "caption": "a skateboarder in the air over a pile of garbage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000036968.jpg", "caption": "a man putting a pizza in the oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000462957.jpg", "caption": "a group of people standing around a table playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000200814.jpg", "caption": "a woman is reading a book in bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000495731.jpg", "caption": "a crowd of people standing in a street with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000200830.jpg", "caption": "a painting of a man taking a picture of a bus", "annotations": [{"polygon": [[323, 301], [324, 327], [325, 327], [404, 314], [405, 297]], "text": "EMPIRE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EMPIRE", "recog_valid": true, "glyph_recog_text": "EMPIRE", "glyph_recog_ld": 1.0}, {"polygon": [[429, 116], [417, 128], [417, 128], [475, 170], [475, 170], [481, 157]], "text": "HOUSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "HOUSING", "recog_valid": true, "glyph_recog_text": "HOUSING", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168076.jpg", "caption": "a mouse and keyboard on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000233617.jpg", "caption": "a fire truck with a hose and other equipment on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397461.jpg", "caption": "a cow laying down on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000528539.jpg", "caption": "a man sitting on the floor with a cat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266418.jpg", "caption": "two men in a boat fishing on a lake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000102607.jpg", "caption": "a little girl eating broccoli with a doll", "annotations": [{"polygon": [[314, 310], [308, 320], [309, 340], [318, 342], [373, 344], [379, 313]], "text": "groot", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "goot", "recog_valid": false, "glyph_recog_text": "groot", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000233690.jpg", "caption": "a woman in a red dress is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000495841.jpg", "caption": "a baseball player holding a bat", "annotations": [{"polygon": [[224, 248], [248, 248], [272, 246], [294, 250], [316, 258], [303, 295], [295, 291], [281, 289], [266, 287], [246, 287], [231, 288]], "text": "TOVAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TOVAR", "recog_valid": true, "glyph_recog_text": "TOVAR", "glyph_recog_ld": 1.0}, {"polygon": [[227, 288], [254, 365], [315, 387], [323, 301]], "text": "83", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "83", "recog_valid": true, "glyph_recog_text": "83", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168194.jpg", "caption": "a smoke coming out of a pole in the city", "annotations": [{"polygon": [[455, 218], [456, 243], [504, 249], [501, 224]], "text": "LIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LIVE", "recog_valid": true, "glyph_recog_text": "LIVE", "glyph_recog_ld": 1.0}, {"polygon": [[457, 182], [458, 213], [512, 221], [512, 192]], "text": "SAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "SATI", "recog_valid": false, "glyph_recog_text": "SAT", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397606.jpg", "caption": "a cat laying on its back in a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135472.jpg", "caption": "a person riding skis down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000332078.jpg", "caption": "a white truck with a dog sitting on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266550.jpg", "caption": "a person is holding a bag of broccoli", "annotations": [{"polygon": [[2, 306], [3, 262], [105, 265], [105, 310]], "text": "Frutas", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Frulas", "recog_valid": false, "glyph_recog_text": "Frutas", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037174.jpg", "caption": "two men standing next to a surfboard repair shop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266557.jpg", "caption": "a bus is parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000069955.jpg", "caption": "a zebra walking in a pen", "annotations": [{"polygon": [[429, 407], [474, 394], [478, 420], [429, 428]], "text": "CS", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "CS", "recog_valid": true, "glyph_recog_text": "cs", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000463175.jpg", "caption": "a man sitting at a table with a birthday cake and a birthday hat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266569.jpg", "caption": "a stop sign on a road with a red car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299371.jpg", "caption": "a bathroom with a shower and a tub", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135545.jpg", "caption": "a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037244.jpg", "caption": "two women are cooking in an oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168331.jpg", "caption": "a microwave and a pile of clothes on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000364941.jpg", "caption": "a man in a green shirt is playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430481.jpg", "caption": "a woman taking a picture of a store sign", "annotations": [{"polygon": [[190, 242], [190, 242], [200, 238], [220, 235], [226, 256], [224, 279], [215, 288], [201, 292], [193, 284], [191, 274], [190, 256]], "text": "CD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "8", "recog_valid": false, "glyph_recog_text": "oc", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000233873.jpg", "caption": "a group of snowboards leaning against a wall", "annotations": [{"polygon": [[408, 377], [401, 176], [427, 172], [444, 174], [452, 386], [419, 385]], "text": "NITRO", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "OELIN", "recog_valid": false, "glyph_recog_text": "z--o", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[325, 210], [315, 292], [317, 323], [352, 348], [352, 314], [343, 220]], "text": "NITRO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UBM", "recog_valid": false, "glyph_recog_text": "Z--co", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299411.jpg", "caption": "a vintage bathroom with a tub, toilet and sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000201114.jpg", "caption": "a man in black and white playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000070062.jpg", "caption": "a sandwich and salad on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004527.jpg", "caption": "a baseball player is throwing the ball to the base", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000463284.jpg", "caption": "a woman sitting at a table with a beer and a sailboat in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000463285.jpg", "caption": "a display of bananas hanging from a tree", "annotations": [{"polygon": [[387, 394], [380, 399], [407, 438], [415, 434]], "text": "LEIRKULUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "LEIRKULUR", "recog_valid": true, "glyph_recog_text": "", "glyph_recog_ld": 1.1111098765503868e-06}, {"polygon": [[302, 379], [296, 384], [321, 418], [328, 414]], "text": "LEIRNULUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "LEIRKULUH", "recog_valid": false, "glyph_recog_text": "125406A513", "glyph_recog_ld": 9.99998999939855e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496065.jpg", "caption": "a suitcase with a sign on it", "annotations": [{"polygon": [[200, 334], [144, 386], [135, 383], [190, 330]], "text": "HOMEGROWN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 1.0}, {"polygon": [[280, 114], [287, 178], [311, 177], [306, 113]], "text": "$10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "$10", "recog_valid": true, "glyph_recog_text": "班一。", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000233925.jpg", "caption": "a street sign on the side of a road at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496078.jpg", "caption": "a duck is swimming in the water with a red frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000397777.jpg", "caption": "a man in a wetsuit riding a surfboard on a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299488.jpg", "caption": "a green building with a sign that says just dollar", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365027.jpg", "caption": "a teddy bear sitting in the driver's seat of a red car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000463334.jpg", "caption": "two men sitting at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365032.jpg", "caption": "a fence with a sign that says sea guy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000463354.jpg", "caption": "a young boy wearing skis and a helmet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000102929.jpg", "caption": "a blender filled with blueberries and yogurt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037396.jpg", "caption": "a man pushing a cart full of bananas down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000201252.jpg", "caption": "a group of people walking down the street with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234046.jpg", "caption": "a view of a motorcycle on a road with trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135744.jpg", "caption": "a woman is sitting on a newspaper table with bananas and potatoes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000070211.jpg", "caption": "a cake that has been cut in half", "annotations": [{"polygon": [[145, 159], [200, 154], [204, 169], [191, 188], [158, 193]], "text": "REIIR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "食", "recog_valid": false, "glyph_recog_text": "REIIR", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496196.jpg", "caption": "a white coffee mug", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037458.jpg", "caption": "a street sign with a bird on it", "annotations": [{"polygon": [[143, 235], [297, 200], [283, 160], [135, 192]], "text": "END", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "END", "recog_valid": true, "glyph_recog_text": "END", "glyph_recog_ld": 1.0}, {"polygon": [[26, 309], [475, 202], [509, 271], [33, 385]], "text": "BIRD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIRD", "recog_valid": true, "glyph_recog_text": "BIRD", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234083.jpg", "caption": "four pictures of food on a counter with a watermelon, peppers, and chicken", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234091.jpg", "caption": "a slice of pizza on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004716.jpg", "caption": "two men playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004719.jpg", "caption": "a plate with a hot dog and chips on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037502.jpg", "caption": "a black cat laying on a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004739.jpg", "caption": "a cow standing on a street in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000004741.jpg", "caption": "a woman riding a horse down a street", "annotations": [{"polygon": [[69, 215], [105, 180], [115, 190], [78, 224]], "text": "IRISH", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "IRISH", "recog_valid": true, "glyph_recog_text": "IRISH", "glyph_recog_ld": 1.0}, {"polygon": [[112, 174], [119, 184], [178, 129], [170, 120]], "text": "KNITWEAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KNITWEAR", "recog_valid": true, "glyph_recog_text": "RNITWEAR", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529030.jpg", "caption": "bicycles are parked in the snow near a pier", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299676.jpg", "caption": "a woman playing tennis on a tennis court", "annotations": [{"polygon": [[145, 160], [139, 209], [155, 213], [158, 203], [200, 209], [206, 166]], "text": "Grolsch", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6oled", "recog_valid": false, "glyph_recog_text": "Grolsch", "glyph_recog_ld": 0.28571530612099116}, {"polygon": [[345, 174], [344, 241], [432, 235], [435, 196], [434, 180]], "text": "Grolsch", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Bobanh", "recog_valid": false, "glyph_recog_text": "Grolsch", "glyph_recog_ld": 0.28571530612099116}, {"polygon": [[236, 165], [234, 220], [245, 225], [303, 222], [307, 172]], "text": "Grolsch", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Yoloh", "recog_valid": false, "glyph_recog_text": "Grolscdh", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000266910.jpg", "caption": "a train with people getting off and on", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496294.jpg", "caption": "a table with a plate of food and a glass of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496302.jpg", "caption": "a large building with a clock tower and a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000463567.jpg", "caption": "a group of people on the beach with kites", "annotations": [{"polygon": [[224, 278], [318, 235], [325, 251], [292, 263], [284, 274], [226, 300]], "text": "yRGA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HS2A", "recog_valid": false, "glyph_recog_text": "YRGA", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000430808.jpg", "caption": "a green bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000135907.jpg", "caption": "a kitchen with a center island and a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299764.jpg", "caption": "three baseball players standing on the mound talking to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299769.jpg", "caption": "a police car is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000332543.jpg", "caption": "a stop sign with a traffic sign on it", "annotations": [{"polygon": [[209, 192], [211, 239], [257, 235], [288, 235], [306, 232], [322, 202], [320, 187]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365320.jpg", "caption": "a jockey is riding a horse at a race", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496401.jpg", "caption": "a red fire hydrant on the side of the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299794.jpg", "caption": "a woman pushing a cart full of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496404.jpg", "caption": "a man playing a video game", "annotations": [{"polygon": [[93, 317], [125, 274], [134, 291], [102, 332]], "text": "ORPCYATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CAPOAIE", "recog_valid": false, "glyph_recog_text": "ORPCYATE", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000201501.jpg", "caption": "three surfers walking on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365344.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168758.jpg", "caption": "a blue train traveling down a track next to trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000168774.jpg", "caption": "two boys sitting at a table with a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398164.jpg", "caption": "a woman is holding a tennis racket on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529240.jpg", "caption": "a man standing at a podium with a frisbee in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299871.jpg", "caption": "a woman wearing a tennis outfit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000103297.jpg", "caption": "a baseball player pitching a ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000463753.jpg", "caption": "two pizzas on a table with a plate of vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529317.jpg", "caption": "san jose airport boeing 707-200", "annotations": [{"polygon": [[279, 65], [409, 64], [410, 65], [415, 96], [370, 94], [314, 94], [296, 95], [283, 92]], "text": "SAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SAN", "recog_valid": true, "glyph_recog_text": "SAN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000299959.jpg", "caption": "a dog is standing on the back of a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365507.jpg", "caption": "a cat sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000070600.jpg", "caption": "a woman standing outside of a store with a bag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365527.jpg", "caption": "a man riding a bicycle down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000332767.jpg", "caption": "a baseball player in a black and blue uniform throwing a ball", "annotations": [{"polygon": [[157, 305], [198, 309], [231, 313], [263, 324], [287, 334], [307, 348], [326, 367], [305, 423], [275, 403], [243, 392], [206, 384], [171, 375], [142, 374], [119, 372], [100, 363]], "text": "EAGLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EAGLE", "recog_valid": false, "glyph_recog_text": "EAGLES", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[202, 459], [230, 404], [287, 433], [255, 474]], "text": "19", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "19", "recog_valid": true, "glyph_recog_text": "19", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005088.jpg", "caption": "a small jet airplane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000037857.jpg", "caption": "a truck with a bunch of signs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562147.jpg", "caption": "a woman wearing a red scarf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005113.jpg", "caption": "a magazine with a picture of a person holding scissors", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267266.jpg", "caption": "a young child eating food with chopsticks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136200.jpg", "caption": "two women in costumes standing in front of a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234516.jpg", "caption": "a black and white photo of trucks parked outside a paint store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234527.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496682.jpg", "caption": "two sheep standing in a field with a hill in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496683.jpg", "caption": "a stop sign with a green arrow and a sign has texts", "annotations": [{"polygon": [[162, 95], [161, 162], [312, 155], [312, 89]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300079.jpg", "caption": "a street sign with a stop sign and a restaurant sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234550.jpg", "caption": "a group of people standing near a bus", "annotations": [{"polygon": [[397, 205], [450, 191], [452, 170], [398, 182]], "text": "BRIGADA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BRIGADA", "recog_valid": true, "glyph_recog_text": "BRIGADA", "glyph_recog_ld": 1.0}, {"polygon": [[464, 365], [447, 427], [511, 427], [513, 397], [497, 365]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "A", "recog_valid": true, "glyph_recog_text": "A", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365631.jpg", "caption": "post furniture appliances and appliances", "annotations": [{"polygon": [[53, 140], [50, 164], [239, 195], [238, 172]], "text": "FURNITURE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FURNITURE", "recog_valid": true, "glyph_recog_text": "FURNITURE", "glyph_recog_ld": 1.0}, {"polygon": [[48, 199], [49, 178], [241, 207], [243, 226]], "text": "APPLIANCES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "APPLIANCES", "recog_valid": true, "glyph_recog_text": "APPLIANCES", "glyph_recog_ld": 1.0}, {"polygon": [[54, 277], [52, 297], [151, 308], [153, 289]], "text": "GAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "GAS", "recog_valid": true, "glyph_recog_text": "GAS", "glyph_recog_ld": 1.0}, {"polygon": [[364, 263], [376, 307], [512, 168], [511, 89]], "text": "FURNIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ORNIT", "recog_valid": false, "glyph_recog_text": "FURNI", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[8, 217], [2, 271], [28, 280], [249, 285], [248, 241]], "text": "Poist", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "oist", "recog_valid": false, "glyph_recog_text": "Poist", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431169.jpg", "caption": "a kitchen with a lot of stuff on the counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169026.jpg", "caption": "a bus driving down a snowy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398407.jpg", "caption": "a pair of scissors and a stop sign", "annotations": [{"polygon": [[271, 256], [374, 177], [401, 207], [299, 290]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365657.jpg", "caption": "a woman holding a cup of donuts", "annotations": [{"polygon": [[126, -1], [144, 0], [159, 2], [181, 3], [210, 1], [224, 0], [314, 0], [325, 4], [336, 6], [346, 10], [347, 35], [334, 33], [319, 30], [300, 26], [275, 25], [243, 27], [223, 31], [209, 33], [190, 37], [160, 35], [138, 33], [125, 24]], "text": "SPRINKLICIOUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SPRINKLICIOUS", "recog_valid": true, "glyph_recog_text": "SPRINKLICIOUS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000496766.jpg", "caption": "a desk with a computer and a chair in a room", "annotations": [{"polygon": [[143, 309], [143, 309], [176, 290], [180, 299], [142, 323]], "text": "JAPAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JAPAN", "recog_valid": true, "glyph_recog_text": "JAPAN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529541.jpg", "caption": "a man in a coat and boots standing on a boat", "annotations": [{"polygon": [[300, 479], [298, 468], [305, 424], [331, 449], [331, 463], [313, 475]], "text": "Ban", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "ca rcw", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[369, 436], [366, 418], [364, 401], [370, 392], [398, 404], [402, 419]], "text": "Iran", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Ghoaon", "recog_valid": false, "glyph_recog_text": "!", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[368, 480], [365, 473], [368, 457], [388, 448], [397, 460], [393, 466], [388, 472], [379, 480]], "text": "1914", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "0A", "recog_valid": false, "glyph_recog_text": "1914", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234665.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005294.jpg", "caption": "a black and white photo of people sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529602.jpg", "caption": "a small calf is inside of a metal cage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562377.jpg", "caption": "a man in an orange shirt is holding a tennis racket", "annotations": [{"polygon": [[237, 315], [266, 279], [278, 256], [261, 242], [224, 305]], "text": "adidas", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "adidas", "recog_valid": true, "glyph_recog_text": "adidas", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431340.jpg", "caption": "a white truck with a sign has texts", "annotations": [{"polygon": [[98, 244], [0, 245], [-1, 298], [97, 292]], "text": "Bre", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "06978", "recog_valid": false, "glyph_recog_text": "Bre", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[119, 250], [117, 304], [252, 326], [255, 260]], "text": "Bread", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Bread", "recog_valid": true, "glyph_recog_text": "Bread", "glyph_recog_ld": 1.0}, {"polygon": [[301, 99], [288, 288], [430, 279], [437, 170]], "text": "Bread", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "程", "recog_valid": false, "glyph_recog_text": " . o", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000333040.jpg", "caption": "a baseball player holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234743.jpg", "caption": "a crowd of people standing around a clock tower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267516.jpg", "caption": "a dog laying on the ground next to a hat", "annotations": [{"polygon": [[446, 195], [447, 154], [494, 156], [492, 193]], "text": "COIN", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "com", "recog_valid": false, "glyph_recog_text": "COIN", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431376.jpg", "caption": "two buses parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234769.jpg", "caption": "a pile of toothpaste and toothbrushes on a table", "annotations": [{"polygon": [[172, 442], [211, 363], [231, 374], [195, 445]], "text": "Colgate", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "ne6jo2", "recog_valid": false, "glyph_recog_text": "Colgate", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[399, 399], [403, 404], [371, 430], [365, 426]], "text": "Co", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "-", "recog_valid": false, "glyph_recog_text": ".", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234786.jpg", "caption": "a blue clock with wings and stars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431400.jpg", "caption": "a group of people riding motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000103726.jpg", "caption": "three men on horses racing on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005424.jpg", "caption": "a man sitting on a bed with a child", "annotations": [{"polygon": [[331, 226], [345, 235], [346, 246], [354, 251], [373, 260], [365, 282], [359, 277], [350, 271], [325, 253]], "text": "ROKI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Ruc", "recog_valid": false, "glyph_recog_text": "ROKI", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038198.jpg", "caption": "a black suitcase sitting on the seat of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000333134.jpg", "caption": "a woman is preparing food in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562510.jpg", "caption": "two people riding motorcycles down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000202066.jpg", "caption": "a bunk bed in a small room with a desk and a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169321.jpg", "caption": "a black and white photo of an airplane on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000529787.jpg", "caption": "a dump truck is parked in front of a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000103822.jpg", "caption": "a man swinging a tennis racket at a tennis ball", "annotations": [{"polygon": [[83, 265], [234, 262], [248, 333], [81, 336]], "text": "COH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LOH", "recog_valid": false, "glyph_recog_text": "COH", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[372, 259], [370, 311], [426, 313], [428, 266]], "text": "D", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "D", "recog_valid": true, "glyph_recog_text": "D", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300441.jpg", "caption": "a red and green bus parked on a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431516.jpg", "caption": "a lufthansa airplane flying through the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267680.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[292, 214], [295, 227], [311, 219], [378, 180], [367, 169]], "text": "GUARDIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GUARDIA", "recog_valid": true, "glyph_recog_text": "GUARDIA", "glyph_recog_ld": 1.0}, {"polygon": [[176, 304], [174, 321], [210, 348], [211, 328]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000365996.jpg", "caption": "a man sitting on a wall with a skateboard", "annotations": [{"polygon": [[11, 143], [11, 88], [125, 90], [125, 141]], "text": "marplaskate", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "marpla kate", "recog_valid": false, "glyph_recog_text": "narplaskate", "glyph_recog_ld": 0.8181819834709241}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497082.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366025.jpg", "caption": "a blender with carrots in it on a counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000333258.jpg", "caption": "a man is playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234956.jpg", "caption": "a stop sign is in front of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000464340.jpg", "caption": "a woman cutting a cake with a knife", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038370.jpg", "caption": "a group of children sitting around a table eating food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000234978.jpg", "caption": "a group of people riding skateboards down a hill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038380.jpg", "caption": "a group of baseball players walking on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005641.jpg", "caption": "two men standing in a field looking at a tablet", "annotations": [{"polygon": [[54, 348], [122, 337], [155, 336], [173, 338], [171, 368], [149, 368], [119, 369], [93, 373], [60, 382]], "text": "MANNY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "MANNY", "recog_valid": true, "glyph_recog_text": "MANNY", "glyph_recog_ld": 1.0}, {"polygon": [[57, 383], [99, 374], [114, 373], [126, 369], [153, 370], [175, 372], [186, 373], [190, 378], [188, 397], [181, 404], [169, 404], [149, 403], [127, 410], [107, 408], [96, 408], [75, 412], [61, 414], [61, 414]], "text": "PACQUAO", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "PACQVA0", "recog_valid": false, "glyph_recog_text": "PACQUAO", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[60, 413], [60, 449], [87, 449], [93, 408]], "text": "O", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "O", "recog_valid": true, "glyph_recog_text": "o", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000398869.jpg", "caption": "a sandwich with lettuce and bread on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136732.jpg", "caption": "two people riding motorcycles on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136736.jpg", "caption": "two trains parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235043.jpg", "caption": "two boxes of donuts", "annotations": [{"polygon": [[185, 105], [185, 105], [218, 64], [221, 63], [231, 69], [215, 86], [192, 111]], "text": "Salted", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SALTE", "recog_valid": false, "glyph_recog_text": "容名号型", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[204, 65], [179, 98], [185, 101], [212, 67]], "text": "LICHTL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "LAGHTT", "recog_valid": false, "glyph_recog_text": "tieHit", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169510.jpg", "caption": "a group of people sitting around a table with bunches of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000333356.jpg", "caption": "a group of people standing in line to get on a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267823.jpg", "caption": "a street sign and a traffic light on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267825.jpg", "caption": "a man holding a child under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000136757.jpg", "caption": "a red fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005688.jpg", "caption": "a beach with umbrellas and people on it", "annotations": [{"polygon": [[0, 157], [3, 162], [20, 155], [44, 149], [58, 156], [64, 155], [67, 147], [66, 136], [56, 129], [48, 117], [21, 133], [1, 148]], "text": "Qutim", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "hntnO", "recog_valid": false, "glyph_recog_text": "Qutim", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267834.jpg", "caption": "a couple of people walking down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235069.jpg", "caption": "a group of people posing for a photo in front of a van", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235081.jpg", "caption": "a stop sign is on a pole in a parking lot", "annotations": [{"polygon": [[176, 250], [229, 244], [236, 262], [179, 274]], "text": "Dont", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DorN", "recog_valid": false, "glyph_recog_text": "Dont", "glyph_recog_ld": 0.5000012499968749}, {"polygon": [[165, 310], [165, 310], [254, 304], [254, 304], [252, 267], [161, 271]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[176, 309], [190, 333], [261, 309], [257, 296]], "text": "Believing", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "believing", "recog_valid": false, "glyph_recog_text": "Believing", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300629.jpg", "caption": "two pizzas sitting in two boxes on a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000333407.jpg", "caption": "a dog laying in a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000169573.jpg", "caption": "a man standing at a produce stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562791.jpg", "caption": "a large clock tower in the middle of a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497257.jpg", "caption": "rockaway beach, florida, usa, usa, usa, usa, usa, usa, usa, usa,", "annotations": [{"polygon": [[60, 248], [291, 224], [300, 164], [64, 190]], "text": "ROCKAWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ROCKAWAY", "recog_valid": true, "glyph_recog_text": "ROCKAWAY", "glyph_recog_ld": 1.0}, {"polygon": [[315, 228], [458, 217], [451, 157], [313, 170]], "text": "BEACH", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BEACH", "recog_valid": true, "glyph_recog_text": "BEACH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267885.jpg", "caption": "a group of people standing around an elephant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000464494.jpg", "caption": "a computer and a laptop sitting on a wooden desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005755.jpg", "caption": "a yellow fire hydrant and a blue shopping cart", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000071295.jpg", "caption": "a suitcase filled with items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005769.jpg", "caption": "a black and white photo of a clock tower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530081.jpg", "caption": "a lunch box with food in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300712.jpg", "caption": "a bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235177.jpg", "caption": "a woman jumping into the water from a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000202410.jpg", "caption": "a bus with an orange and black paint job", "annotations": [{"polygon": [[371, 419], [362, 445], [499, 444], [506, 417], [455, 414], [418, 414]], "text": "yakobus", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "yakobus", "recog_valid": true, "glyph_recog_text": "yakobus", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399028.jpg", "caption": "a blue stool and a motorcycle parked next to a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104125.jpg", "caption": "a toy airplane on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300738.jpg", "caption": "a blue and yellow bus with a man sitting in the front seat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431818.jpg", "caption": "a stop sign and railroad crossing sign", "annotations": [{"polygon": [[352, 174], [456, 107], [465, 122], [358, 187]], "text": "CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CROSSING", "recog_valid": true, "glyph_recog_text": "CROSSING", "glyph_recog_ld": 1.0}, {"polygon": [[364, 99], [378, 91], [402, 129], [393, 144]], "text": "RAIL", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "RAIL", "recog_valid": true, "glyph_recog_text": "RAIL", "glyph_recog_ld": 1.0}, {"polygon": [[407, 164], [419, 155], [450, 198], [436, 206]], "text": "ROAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ROAD", "recog_valid": true, "glyph_recog_text": "ROAD", "glyph_recog_ld": 1.0}, {"polygon": [[348, 240], [339, 270], [403, 287], [414, 267], [414, 259]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[344, 273], [365, 269], [398, 270], [411, 273], [418, 286], [408, 299], [393, 299], [370, 295], [369, 289], [351, 291], [342, 281]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CENNT", "recog_valid": false, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000267985.jpg", "caption": "a woman and two men working on laptops in a library", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000202465.jpg", "caption": "a stop sign with a street sign on top of it", "annotations": [{"polygon": [[195, 224], [190, 256], [222, 248], [224, 221]], "text": "L", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LD", "recog_valid": false, "glyph_recog_text": "L", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[213, 180], [206, 214], [227, 238], [239, 205]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "W", "recog_valid": true, "glyph_recog_text": "W", "glyph_recog_ld": 1.0}, {"polygon": [[249, 206], [314, 187], [311, 217], [250, 235]], "text": "OLN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OLN", "recog_valid": true, "glyph_recog_text": "OLN", "glyph_recog_ld": 1.0}, {"polygon": [[253, 225], [246, 259], [275, 283], [286, 255]], "text": "19", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "X9", "recog_valid": false, "glyph_recog_text": "19", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[88, 471], [92, 511], [347, 511], [390, 454], [405, 432], [407, 417], [404, 395], [392, 384], [379, 383], [338, 394], [276, 407], [220, 425], [165, 439], [126, 445], [114, 448]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[339, 193], [339, 211], [387, 194], [387, 178]], "text": "1900", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1900", "recog_valid": true, "glyph_recog_text": "1900", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497386.jpg", "caption": "a train is parked at a station with people standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497393.jpg", "caption": "a cowboy riding a horse in a rodeo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000464629.jpg", "caption": "a black motorcycle parked in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000005883.jpg", "caption": "a truck driving down a city street with a billboard in the background", "annotations": [{"polygon": [[181, 0], [178, 84], [160, 82], [142, 90], [148, 0]], "text": "HERSHE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "TSH", "recog_valid": false, "glyph_recog_text": "工wco.", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000431872.jpg", "caption": "a parking meter with a bicycle on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366342.jpg", "caption": "a green and yellow bus driving down a street", "annotations": [{"polygon": [[359, 214], [360, 232], [455, 222], [453, 201]], "text": "BARGAINS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BARGAINS", "recog_valid": true, "glyph_recog_text": "BARGAINS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000464652.jpg", "caption": "a man in a suit and tie standing on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000562960.jpg", "caption": "a woman walking down the street with an umbrella", "annotations": [{"polygon": [[150, 125], [152, 150], [187, 158], [188, 136]], "text": "DELIVERY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DEUNER!", "recog_valid": false, "glyph_recog_text": "DERJNVENY", "glyph_recog_ld": 0.44444506172770915}, {"polygon": [[-1, 114], [40, 118], [43, 146], [39, 149], [31, 150], [23, 139], [0, 137]], "text": "ing", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ing", "recog_valid": true, "glyph_recog_text": "ing", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038681.jpg", "caption": "a busy city street with cars and trucks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000333613.jpg", "caption": "a man standing in front of a toilet with graffiti on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000497455.jpg", "caption": "a bicycle with an umbrella attached to it", "annotations": [{"polygon": [[200, 429], [164, 389], [177, 383], [217, 428]], "text": "KO", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "B o", "recog_valid": false, "glyph_recog_text": "KO", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000071507.jpg", "caption": "a cell phone sitting on top of a speaker", "annotations": [{"polygon": [[211, 398], [494, 400], [477, 442], [213, 442]], "text": "DIGITALLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DIGITALLY", "recog_valid": true, "glyph_recog_text": "DIGITALLY", "glyph_recog_ld": 1.0}, {"polygon": [[17, 400], [110, 400], [111, 443], [17, 443]], "text": "LIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "LIVE", "recog_valid": true, "glyph_recog_text": "LIVE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268146.jpg", "caption": "a yellow and blue surfboard on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038771.jpg", "caption": "a toilet with a yellow lid and a hose", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530294.jpg", "caption": "two men in blue shirts and ties standing next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000333687.jpg", "caption": "a man holding a tennis racket on a tennis court", "annotations": [{"polygon": [[161, -2], [202, 6], [204, 34], [159, 27]], "text": "Royal", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Royal", "recog_valid": true, "glyph_recog_text": "Roya", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[206, 5], [252, 20], [251, 40], [206, 32]], "text": "Crown", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Crown", "recog_valid": true, "glyph_recog_text": "iCrown", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[253, 14], [324, 30], [323, 50], [252, 39]], "text": "Insurance", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Insurance", "recog_valid": true, "glyph_recog_text": "insurance", "glyph_recog_ld": 0.8888890123455419}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300930.jpg", "caption": "a brick building with a sign that says butternut", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563076.jpg", "caption": "a right lane closed ahead sign on a white background", "annotations": [{"polygon": [[126, 226], [125, 258], [215, 260], [216, 227]], "text": "RIGHT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RIGHT", "recog_valid": true, "glyph_recog_text": "RIGHT", "glyph_recog_ld": 1.0}, {"polygon": [[251, 228], [252, 259], [336, 259], [333, 228]], "text": "LANE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LANE", "recog_valid": true, "glyph_recog_text": "LANE", "glyph_recog_ld": 1.0}, {"polygon": [[163, 275], [162, 308], [296, 306], [294, 275]], "text": "CLOSED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CLOSED", "recog_valid": true, "glyph_recog_text": "CLOSED", "glyph_recog_ld": 1.0}, {"polygon": [[177, 323], [177, 323], [172, 358], [289, 357], [287, 324]], "text": "AHEAD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AHEAD", "recog_valid": true, "glyph_recog_text": "AHEAD", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137105.jpg", "caption": "a green and yellow train engine sitting in a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000071582.jpg", "caption": "a baseball glove and ball sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000071589.jpg", "caption": "a parking meter on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235430.jpg", "caption": "a fighter jet sitting on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104380.jpg", "caption": "a delta airplane flying in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000300990.jpg", "caption": "a woman playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235468.jpg", "caption": "a man and a woman sitting in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268268.jpg", "caption": "a black and white photo of a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137202.jpg", "caption": "a woman holding a red umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000038905.jpg", "caption": "an old black and white photo of a bus on the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104443.jpg", "caption": "a truck with a large advertisement on the side of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000530433.jpg", "caption": "a double decker bus driving down a snowy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000464902.jpg", "caption": "a blue basket with toothbrushes in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000464917.jpg", "caption": "a group of people standing in a line with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235568.jpg", "caption": "a bathroom with a sink, toilet and mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563250.jpg", "caption": "a small airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268341.jpg", "caption": "a volleyball net", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000071735.jpg", "caption": "a young boy holding a baseball bat on a baseball field", "annotations": [{"polygon": [[236, 248], [243, 273], [256, 296], [272, 310], [286, 295], [286, 278], [259, 227]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TO", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000333891.jpg", "caption": "a group of people flying kites in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000202829.jpg", "caption": "a display case with various pastries and doughnuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000333922.jpg", "caption": "a woman standing next to a motorcycle and a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563302.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000465008.jpg", "caption": "a large white airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563323.jpg", "caption": "a woman in a leopard print coat and boots skiing in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000071818.jpg", "caption": "a white police truck parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104607.jpg", "caption": "a baseball player is at home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563380.jpg", "caption": "a red train car sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268472.jpg", "caption": "a baseball player on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000334032.jpg", "caption": "people standing in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170193.jpg", "caption": "a boy eating a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432339.jpg", "caption": "a restaurant with a counter and a counter with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235741.jpg", "caption": "a train is pulling into a station with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268523.jpg", "caption": "a black and white photo of boats in a harbor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000235760.jpg", "caption": "a man in a wheelchair holding a stuffed animal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399630.jpg", "caption": "a woman playing tennis", "annotations": [{"polygon": [[16, 243], [16, 246], [443, 287], [497, 266], [82, 225], [15, 243]], "text": "TORONTO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OINO", "recog_valid": false, "glyph_recog_text": "T O R O N T O", "glyph_recog_ld": 0.23076982248475186}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366863.jpg", "caption": "a large plate of food with a glass of wine", "annotations": [{"polygon": [[64, 436], [64, 411], [163, 404], [168, 438]], "text": "Albani", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Albanw", "recog_valid": false, "glyph_recog_text": "Albani", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039201.jpg", "caption": "a woman standing next to a pile of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000465195.jpg", "caption": "a collage of pictures of a road with a sign that says frost heaves", "annotations": [{"polygon": [[190, 273], [338, 274], [335, 225], [187, 219]], "text": "FROST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FROST", "recog_valid": true, "glyph_recog_text": "FROST", "glyph_recog_ld": 1.0}, {"polygon": [[171, 280], [355, 280], [361, 328], [173, 337]], "text": "HEAVES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HEAVES", "recog_valid": true, "glyph_recog_text": "HEAVES", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000203054.jpg", "caption": "a brown leather bag, cell phone, sunglasses, and other items", "annotations": [{"polygon": [[355, 142], [298, 242], [265, 226], [326, 126]], "text": "RUSSH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "IHSSH", "recog_valid": false, "glyph_recog_text": "RUSSH", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170291.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301373.jpg", "caption": "a fire truck is parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563545.jpg", "caption": "a man in a hat talking to a woman", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000301405.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[155, 97], [399, 156], [460, 109], [460, 72], [214, 22]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[150, 255], [203, 272], [221, 240], [172, 224]], "text": "NO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NO", "recog_valid": true, "glyph_recog_text": "NO", "glyph_recog_ld": 1.0}, {"polygon": [[123, 307], [250, 363], [264, 323], [139, 276]], "text": "PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PARKING", "recog_valid": true, "glyph_recog_text": "PARKING", "glyph_recog_ld": 1.0}, {"polygon": [[147, 335], [125, 376], [185, 398], [207, 360]], "text": "ANY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ANY", "recog_valid": true, "glyph_recog_text": "ANY", "glyph_recog_ld": 1.0}, {"polygon": [[248, 264], [249, 285], [263, 292], [272, 296], [279, 292], [281, 283], [281, 273], [254, 264]], "text": "2007", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6", "recog_valid": false, "glyph_recog_text": "2007", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000104817.jpg", "caption": "a man holding two plates of food in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000203137.jpg", "caption": "a large clock with a large face on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000366984.jpg", "caption": "a clock on a pole with a blue sky in the background", "annotations": [{"polygon": [[179, 191], [192, 184], [206, 179], [223, 176], [243, 178], [259, 181], [274, 188], [290, 200], [280, 209], [269, 200], [255, 195], [247, 192], [230, 191], [221, 191], [207, 194], [188, 204]], "text": "PINEHURST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HNEHURS", "recog_valid": false, "glyph_recog_text": "PINEHURST", "glyph_recog_ld": 0.6666670370366254}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498079.jpg", "caption": "a woman riding a bicycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268718.jpg", "caption": "a cat sitting on a motorcycle", "annotations": [{"polygon": [[260, 441], [271, 416], [355, 425], [350, 446]], "text": "SYM", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SYM", "recog_valid": true, "glyph_recog_text": "SYM", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000006578.jpg", "caption": "a woman is brushing her hair in front of a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170432.jpg", "caption": "a man riding a motorcycle down a busy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563651.jpg", "caption": "a man laying on the ground with a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137681.jpg", "caption": "a man walking in the street next to a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432593.jpg", "caption": "a man in a yellow vest is standing next to two buses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072156.jpg", "caption": "a man riding a skateboard in a city", "annotations": [{"polygon": [[145, 183], [161, 249], [194, 257], [197, 252], [195, 225], [177, 171], [157, 174]], "text": "SA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CD", "recog_valid": false, "glyph_recog_text": "S A", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000203248.jpg", "caption": "a stuffed monkey sitting on a desk next to a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399868.jpg", "caption": "a baseball player in a baseball uniform", "annotations": [{"polygon": [[258, 250], [265, 245], [271, 242], [278, 238], [287, 238], [298, 236], [301, 251], [293, 250], [283, 251], [277, 254], [270, 258], [263, 267]], "text": "MARINER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MAIE", "recog_valid": false, "glyph_recog_text": "MAR有生用", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563711.jpg", "caption": "a street sign with a directional arrow pointing to a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399875.jpg", "caption": "a man is playing tennis on a court", "annotations": [{"polygon": [[379, 227], [366, 211], [346, 227], [357, 248]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236048.jpg", "caption": "a building with a clock on the top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000137748.jpg", "caption": "a car driving down a city street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039447.jpg", "caption": "a man standing on a tennis court with a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000334362.jpg", "caption": "a woman playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000334364.jpg", "caption": "a man is behind the counter at a fast food restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170525.jpg", "caption": "a man and woman in uniform cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000432673.jpg", "caption": "two cats on a table", "annotations": [{"polygon": [[2, 219], [41, 236], [36, 256], [25, 253], [21, 260], [1, 253]], "text": "M's", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ns", "recog_valid": false, "glyph_recog_text": "M's", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072237.jpg", "caption": "a pair of scissors and a measuring tape on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039475.jpg", "caption": "jeff gordon bvd sign", "annotations": [{"polygon": [[53, 260], [161, 261], [149, 302], [28, 304]], "text": "JEFF", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "JEFF", "recog_valid": true, "glyph_recog_text": "JEFF", "glyph_recog_ld": 1.0}, {"polygon": [[184, 259], [365, 261], [368, 303], [185, 302]], "text": "GORDON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GORDON", "recog_valid": true, "glyph_recog_text": "GORDON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000170562.jpg", "caption": "a man on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105030.jpg", "caption": "a cat sitting on a tv with a box in front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072263.jpg", "caption": "a green double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039514.jpg", "caption": "a police vehicle parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072282.jpg", "caption": "a bus parked on a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105058.jpg", "caption": "a train traveling under a bridge with a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268899.jpg", "caption": "a person holding a cell phone with a key chain attached", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000268900.jpg", "caption": "a group of people sitting at a table with wine glasses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000334441.jpg", "caption": "a man riding a motorcycle past a gas station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000531049.jpg", "caption": "cake girl by hong kong kim", "annotations": [{"polygon": [[276, 256], [267, 317], [289, 324], [316, 324], [338, 322], [349, 312], [355, 298], [333, 258]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "5", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000399983.jpg", "caption": "a man in white playing tennis on a tennis court", "annotations": [{"polygon": [[72, 181], [259, 181], [260, 68], [71, 71]], "text": "BAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BAS", "recog_valid": true, "glyph_recog_text": "BAS", "glyph_recog_ld": 1.0}, {"polygon": [[70, 249], [119, 247], [119, 278], [70, 279]], "text": "AS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "AS", "recog_valid": true, "glyph_recog_text": "AS", "glyph_recog_ld": 1.0}, {"polygon": [[377, 245], [441, 244], [441, 279], [377, 278]], "text": "BNI", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BNI", "recog_valid": true, "glyph_recog_text": "BNI", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000563827.jpg", "caption": "a group of old televisions sitting outside of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039555.jpg", "caption": "a long line of cars waiting to turn left", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000334473.jpg", "caption": "a baseball game being played", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105100.jpg", "caption": "a display of old appliances in a museum", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105115.jpg", "caption": "a bus is parked on a street next to a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072354.jpg", "caption": "a crowded beach with many colorful umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072391.jpg", "caption": "a military jet sitting on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367321.jpg", "caption": "a man taking a picture of himself with a camera", "annotations": [{"polygon": [[140, 488], [143, 465], [160, 470], [176, 475], [190, 484], [205, 487], [227, 489], [223, 510], [200, 506], [185, 500], [159, 493]], "text": "TAPES", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "239AT", "recog_valid": false, "glyph_recog_text": "TAPES", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[354, 431], [365, 478], [337, 484], [293, 485], [261, 485], [228, 485], [207, 482], [212, 434], [242, 441], [284, 444], [324, 444]], "text": "PDM", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "MC9", "recog_valid": false, "glyph_recog_text": "PDM", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105200.jpg", "caption": "a group of people playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498425.jpg", "caption": "takeover of the capitol building, march 2011", "annotations": [{"polygon": [[149, 287], [312, 286], [312, 334], [148, 331]], "text": "TAKEOVR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TAKEOVR", "recog_valid": true, "glyph_recog_text": "TAKEOVR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000400133.jpg", "caption": "a toilet with a sign on it has texts", "annotations": [{"polygon": [[133, 206], [145, 238], [246, 189], [237, 164], [134, 206]], "text": "PERFECTLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PERFECTLY", "recog_valid": true, "glyph_recog_text": "PERFECTLY", "glyph_recog_ld": 1.0}, {"polygon": [[164, 249], [183, 288], [253, 239], [241, 211], [179, 241]], "text": "GOOD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Gooo", "recog_valid": false, "glyph_recog_text": "GOOD", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000334603.jpg", "caption": "a small building with two toilets and a sign on the door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367375.jpg", "caption": "a horse pulling a cart", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105257.jpg", "caption": "a group of ducklings swimming in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236332.jpg", "caption": "a stop sign on a street corner with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000138036.jpg", "caption": "a man riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000203577.jpg", "caption": "a white kitchen with a refrigerator, microwave and sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564043.jpg", "caption": "a stop sign with a measuring tape on it", "annotations": [{"polygon": [[88, 199], [86, 328], [397, 332], [400, 204]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236386.jpg", "caption": "the contents of a backpack laid out on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367462.jpg", "caption": "a china airlines airplane on the tarmac at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000531309.jpg", "caption": "an aerial view of airplanes parked in a desert", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000531316.jpg", "caption": "a cake with a noah's ark on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433013.jpg", "caption": "a train pulling into a station with people standing on the platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072573.jpg", "caption": "a surfer riding a wave in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000465824.jpg", "caption": "a woman sitting on a pink scooter", "annotations": [{"polygon": [[174, 262], [189, 254], [200, 252], [218, 252], [205, 277], [194, 293], [179, 294], [174, 295], [166, 300], [164, 288]], "text": "JUICY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "多", "recog_valid": false, "glyph_recog_text": "AOInF", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000465825.jpg", "caption": "a desk with a computer, a laptop, and a monitor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000039847.jpg", "caption": "a busy street with many signs and people walking around", "annotations": [{"polygon": [[11, 322], [9, 349], [44, 355], [46, 330]], "text": "UPDATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "RMT", "recog_valid": false, "glyph_recog_text": "UPONTE", "glyph_recog_ld": 0.1666680555532407}, {"polygon": [[397, 291], [409, 305], [431, 259], [418, 246]], "text": "CocaCola", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BorGola", "recog_valid": false, "glyph_recog_text": "CocaCola", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498601.jpg", "caption": "a small white airplane on the runway in front of a green field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000138166.jpg", "caption": "a sandwich on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236475.jpg", "caption": "a man and a woman on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000334787.jpg", "caption": "a man flying a kite in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000072650.jpg", "caption": "a man standing on a beach holding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433129.jpg", "caption": "a car wash sign with a sign that says southern pines car wash", "annotations": [{"polygon": [[239, 20], [239, 20], [275, 22], [312, 27], [347, 34], [348, 56], [322, 52], [299, 49], [277, 48], [259, 45], [241, 44]], "text": "TABERNACLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "TABERNACLE", "recog_valid": true, "glyph_recog_text": "TABERNACLE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000531444.jpg", "caption": "a pan with pizza rolls and dipping sauce", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564226.jpg", "caption": "a man is riding a paddle board through rapids", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000498722.jpg", "caption": "a sandwich on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433200.jpg", "caption": "a bicycle with a backpack on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000138288.jpg", "caption": "a woman in red tights and black boots is looking at her cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000138299.jpg", "caption": "a vintage illustration of two people playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105531.jpg", "caption": "a woman is making doughnuts in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000269372.jpg", "caption": "a street with many chinese signs and buildings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000007228.jpg", "caption": "a group of people crossing the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000433225.jpg", "caption": "a stop sign sitting on a road next to a mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000334950.jpg", "caption": "a vintage fire truck with people riding in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105576.jpg", "caption": "a table with vegetables and a bottle of beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236650.jpg", "caption": "a group of young girls playing soccer on a field", "annotations": [{"polygon": [[259, 243], [302, 236], [308, 272], [269, 279]], "text": "23", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "23", "recog_valid": true, "glyph_recog_text": "23", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367724.jpg", "caption": "a small van with a number on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000531575.jpg", "caption": "a fire hydrant sitting on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000138385.jpg", "caption": "a person sitting at a table with a plate of eggs and bread", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000334995.jpg", "caption": "an open book with a clock and a watch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302236.jpg", "caption": "a man in a red and black motorcycle jacket sitting on a red motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000138401.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[139, 220], [148, 208], [154, 210], [159, 209], [172, 225], [170, 231], [162, 238], [155, 237], [151, 235], [147, 229], [143, 225]], "text": "SD", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Q", "recog_valid": false, "glyph_recog_text": "SD", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000203937.jpg", "caption": "a laptop computer sitting on a table next to a mouse", "annotations": [{"polygon": [[330, 402], [326, 409], [291, 384], [296, 379]], "text": "Everyting", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Everything", "recog_valid": false, "glyph_recog_text": "by4-yet", "glyph_recog_ld": 0.1000008999991}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000367786.jpg", "caption": "a motorcycle seat bag is sitting on the ground", "annotations": [{"polygon": [[229, 425], [261, 394], [266, 403], [235, 435]], "text": "SPEED", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "RPED", "recog_valid": false, "glyph_recog_text": "SPEEO", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[262, 390], [291, 361], [295, 371], [270, 399]], "text": "TRIPLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VRPL", "recog_valid": false, "glyph_recog_text": "TRIPLE", "glyph_recog_ld": 0.5000008333319443}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335027.jpg", "caption": "a woman holding a surfboard on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000138434.jpg", "caption": "a car parked at a traffic light under a cloudy sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000138456.jpg", "caption": "three black bears walking in the woods", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236760.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000138461.jpg", "caption": "a cell phone sitting on top of a speaker", "annotations": [{"polygon": [[16, 399], [16, 399], [17, 444], [48, 444], [66, 444], [112, 444], [111, 400], [61, 400]], "text": "LIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "LIVE", "recog_valid": true, "glyph_recog_text": "LIVE", "glyph_recog_ld": 1.0}, {"polygon": [[213, 399], [213, 441], [261, 442], [297, 443], [323, 443], [347, 444], [395, 445], [437, 444], [477, 444], [498, 399], [461, 400], [428, 399], [404, 400], [374, 400], [343, 399]], "text": "DIGITALLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DIGITALLY", "recog_valid": true, "glyph_recog_text": "DIGITALLY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000400606.jpg", "caption": "a table with a variety of tools on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466153.jpg", "caption": "a clock is mounted on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204039.jpg", "caption": "a teddy bear wearing a shirt that says slayer", "annotations": [{"polygon": [[225, 404], [274, 206], [308, 215], [271, 414]], "text": "SLAYER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0", "recog_valid": false, "glyph_recog_text": "SLAYER", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000531730.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[402, 98], [434, 145], [465, 143], [434, 96]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "", "recog_valid": false, "glyph_recog_text": "1 2", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564503.jpg", "caption": "a clock tower in the middle of a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105761.jpg", "caption": "a train traveling down the tracks with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073016.jpg", "caption": "a carousel with elephants and horses", "annotations": [{"polygon": [[178, 165], [213, 143], [214, 151], [180, 175], [180, 175]], "text": "STARDUST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STAHH06T", "recog_valid": false, "glyph_recog_text": "530E00E", "glyph_recog_ld": 0.12500109374863277}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499037.jpg", "caption": "a man skiing on a cross country course", "annotations": [{"polygon": [[136, 126], [376, 140], [385, 97], [141, 78]], "text": "START", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "START", "recog_valid": true, "glyph_recog_text": "START", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564589.jpg", "caption": "a group of people standing around a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000105853.jpg", "caption": "a man walking down the street in front of a corner bistro sign", "annotations": [{"polygon": [[0, 37], [124, 74], [123, 102], [0, 68]], "text": "CORNER", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "ORNER", "recog_valid": false, "glyph_recog_text": "CORNER", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[-1, 80], [113, 110], [113, 139], [0, 112]], "text": "BISTRO", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "BISTRO", "recog_valid": true, "glyph_recog_text": "BISTRO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000138670.jpg", "caption": "a group of kids playing soccer on a field", "annotations": [{"polygon": [[291, 253], [271, 285], [285, 290], [305, 256]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000236990.jpg", "caption": "a large airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335316.jpg", "caption": "a piece of cake on a plate", "annotations": [{"polygon": [[219, 229], [244, 218], [247, 221], [252, 218], [251, 216], [260, 210], [261, 212], [264, 211], [264, 210], [273, 203], [273, 206], [277, 205], [275, 195], [270, 198], [267, 196], [216, 222]], "text": "Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Photography", "recog_valid": true, "glyph_recog_text": "Phokography", "glyph_recog_ld": 0.909090991735462}, {"polygon": [[324, 256], [327, 267], [385, 243], [382, 230]], "text": "photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Photography", "recog_valid": false, "glyph_recog_text": "photography", "glyph_recog_ld": 0.909090991735462}, {"polygon": [[135, 307], [138, 319], [200, 292], [193, 278]], "text": "photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Photography", "recog_valid": false, "glyph_recog_text": "ohotography", "glyph_recog_ld": 0.909090991735462}, {"polygon": [[358, 59], [361, 69], [414, 45], [412, 34]], "text": "photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Pholograbl", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 9.99998999939855e-07}, {"polygon": [[107, 183], [111, 195], [168, 169], [165, 155]], "text": "photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Photography", "recog_valid": false, "glyph_recog_text": "ahctography", "glyph_recog_ld": 0.8181819834709241}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335323.jpg", "caption": "a fighter jet flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368093.jpg", "caption": "photo of jason kipnis at spring training", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466406.jpg", "caption": "a white computer keyboard and mouse sitting on top of a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171499.jpg", "caption": "an american airlines jetliner flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335362.jpg", "caption": "a sandwich and a glass of beer on a table", "annotations": [{"polygon": [[272, 216], [335, 191], [336, 211], [320, 222], [290, 234], [273, 235]], "text": "GUNNE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GUINN", "recog_valid": false, "glyph_recog_text": "GUNNE", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302595.jpg", "caption": "a man playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073234.jpg", "caption": "a bicycle is parked next to a train", "annotations": [{"polygon": [[92, 206], [92, 206], [94, 254], [228, 253], [247, 203]], "text": "FART", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "7A7T", "recog_valid": false, "glyph_recog_text": "FART", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106003.jpg", "caption": "a suitcase sitting on a dock near a canal", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000138772.jpg", "caption": "a woman holding up three cell phones", "annotations": [{"polygon": [[177, 66], [185, 98], [205, 94], [230, 94], [240, 95], [261, 99], [287, 109], [301, 90], [298, 86], [283, 77], [264, 72], [239, 64], [224, 61], [200, 62]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "HAPRY", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[149, 114], [169, 101], [174, 98], [185, 95], [214, 93], [227, 94], [225, 130], [217, 130], [205, 130], [186, 134], [168, 144]], "text": "NEW", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "NEW", "recog_valid": true, "glyph_recog_text": "NEW", "glyph_recog_ld": 1.0}, {"polygon": [[231, 94], [231, 94], [236, 129], [269, 137], [291, 148], [303, 159], [327, 136], [314, 121], [298, 113], [276, 104], [258, 98]], "text": "YEAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "YEAR", "recog_valid": true, "glyph_recog_text": "YEAR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106010.jpg", "caption": "a man and a child on skis standing in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466511.jpg", "caption": "an old street with a sign that says antique", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000302678.jpg", "caption": "a cup of coffee and a donut on a table", "annotations": [{"polygon": [[121, 417], [141, 406], [148, 337], [145, 320], [128, 325], [122, 337], [112, 365], [112, 395]], "text": "TOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RoL", "recog_valid": false, "glyph_recog_text": "-oa", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[138, 437], [151, 451], [170, 465], [190, 467], [193, 456], [148, 428]], "text": "HAND", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "HAND", "recog_valid": true, "glyph_recog_text": "HAND", "glyph_recog_ld": 1.0}, {"polygon": [[197, 456], [199, 471], [219, 471], [238, 467], [254, 459], [264, 452], [274, 445], [268, 433], [257, 440], [250, 445], [239, 450], [215, 456]], "text": "FORGED", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "FORGED", "recog_valid": true, "glyph_recog_text": "FORGED", "glyph_recog_ld": 1.0}, {"polygon": [[271, 427], [280, 434], [295, 410], [300, 391], [303, 367], [300, 346], [294, 328], [287, 315], [280, 311], [272, 312], [268, 319], [271, 330], [277, 336], [278, 342], [283, 356], [285, 361], [285, 371], [285, 378], [285, 384], [286, 391], [286, 393], [284, 407], [278, 415]], "text": "DOUGHNUTS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SIONHDOOO", "recog_valid": false, "glyph_recog_text": "oo=o+z=", "glyph_recog_ld": 1.1111098765503868e-06}, {"polygon": [[172, 320], [185, 311], [198, 307], [216, 306], [224, 307], [237, 312], [246, 312], [255, 289], [232, 282], [214, 282], [202, 285], [177, 290], [165, 293], [159, 297]], "text": "POT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PUT", "recog_valid": false, "glyph_recog_text": "POT", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171607.jpg", "caption": "a woman playing tennis on a court", "annotations": [{"polygon": [[170, 99], [163, 116], [170, 117], [169, 130], [178, 133], [189, 122], [196, 127], [201, 124], [228, 129], [231, 118], [222, 113], [197, 107], [203, 96], [199, 93], [190, 106], [172, 102]], "text": "DesJoyaux", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Deyloyaur", "recog_valid": false, "glyph_recog_text": "Des.icyaux", "glyph_recog_ld": 0.5000004999995}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000564837.jpg", "caption": "a bottle of vodka on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466536.jpg", "caption": "a street with a clock tower in the middle of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335500.jpg", "caption": "two women eating at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073367.jpg", "caption": "a street sign with a woman walking down the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237210.jpg", "caption": "a busy street with cars parked on both sides of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532132.jpg", "caption": "a man is taking a picture of a display of donuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368293.jpg", "caption": "a large bulldozer sitting on top of a dirt road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466627.jpg", "caption": "a cookie and ice cream sandwich on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000466664.jpg", "caption": "a bag filled with items including a cell phone, a wallet, a hat, a bag, a pair of sunglasses, a pair of shoes, a pair of", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499446.jpg", "caption": "a no parking sign with an arrow pointing to the right", "annotations": [{"polygon": [[165, 85], [165, 142], [205, 137], [206, 78]], "text": "NO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "会", "recog_valid": false, "glyph_recog_text": "zo", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[213, 88], [215, 125], [327, 106], [325, 67]], "text": "PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PARKING", "recog_valid": true, "glyph_recog_text": "PARKING", "glyph_recog_ld": 1.0}, {"polygon": [[197, 233], [196, 175], [301, 162], [301, 221]], "text": "ANY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ANY", "recog_valid": true, "glyph_recog_text": "ANY", "glyph_recog_ld": 1.0}, {"polygon": [[190, 244], [191, 302], [308, 292], [308, 233]], "text": "TIME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TIME", "recog_valid": true, "glyph_recog_text": "TIME", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204537.jpg", "caption": "a man throwing a baseball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237327.jpg", "caption": "a man on a motorcycle next to a no parking sign", "annotations": [{"polygon": [[260, 42], [259, 83], [293, 82], [296, 39]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "R", "recog_valid": false, "glyph_recog_text": "P", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368409.jpg", "caption": "a woman holding an umbrella walks down a street in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532286.jpg", "caption": "a group of men sitting at a table eating food", "annotations": [{"polygon": [[437, 319], [459, 318], [495, 400], [490, 407], [471, 407]], "text": "WES", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "E", "recog_valid": false, "glyph_recog_text": "WES", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139095.jpg", "caption": "a group of men playing basketball in a gym", "annotations": [{"polygon": [[262, 245], [268, 239], [296, 248], [288, 278], [268, 275]], "text": "10", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "10", "recog_valid": true, "glyph_recog_text": "9", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368479.jpg", "caption": "a large airplane parked at an airport with a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335717.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073610.jpg", "caption": "a man standing in front of a mirror with a dog", "annotations": [{"polygon": [[335, 258], [336, 283], [354, 278], [372, 277], [367, 288], [373, 291], [382, 277], [397, 277], [399, 262], [379, 263], [353, 260], [340, 257]], "text": "Chicago", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Hieage", "recog_valid": false, "glyph_recog_text": "Chicago", "glyph_recog_ld": 0.428572244896793}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434070.jpg", "caption": "a woman cutting a cake on a table", "annotations": [{"polygon": [[264, 212], [264, 233], [319, 244], [325, 230]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Birthda", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000401310.jpg", "caption": "a girl in a grey sweatshirt", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270248.jpg", "caption": "a baseball player swinging a bat on a field", "annotations": [{"polygon": [[339, 230], [327, 281], [291, 272], [301, 211]], "text": "GIBSON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WOSE19", "recog_valid": false, "glyph_recog_text": "NOSBIS", "glyph_recog_ld": 0.3333344444425925}, {"polygon": [[343, 230], [328, 297], [401, 306], [407, 292]], "text": "85", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "85", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204714.jpg", "caption": "a large entertainment center with a television and cabinets", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499627.jpg", "caption": "a clock and a plant on a window sill", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106430.jpg", "caption": "a large jet airplane sitting on the tarmac", "annotations": [{"polygon": [[159, 247], [152, 286], [254, 286], [260, 237]], "text": "DHL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "R", "recog_valid": false, "glyph_recog_text": "DHL", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139211.jpg", "caption": "a man eating a donut in a market", "annotations": [{"polygon": [[114, 77], [117, 51], [173, 70], [168, 93]], "text": "Lee's", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Lees", "recog_valid": false, "glyph_recog_text": "Lee's", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[109, 74], [105, 101], [221, 141], [223, 116]], "text": "Donuts", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "Donuits", "recog_valid": false, "glyph_recog_text": "Donuts", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[107, 113], [106, 138], [163, 154], [166, 134]], "text": "Donuts .", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Donuts", "recog_valid": false, "glyph_recog_text": "Donuts.", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[178, 135], [176, 165], [221, 177], [222, 155]], "text": "Crepes", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Crepes", "recog_valid": true, "glyph_recog_text": "Crepes", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368588.jpg", "caption": "a pizza and a beer on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139214.jpg", "caption": "a car driving down a street with a green light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270290.jpg", "caption": "pumpkin bread with chocolate icing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139216.jpg", "caption": "a stuffed animal and a bottle of soda", "annotations": [{"polygon": [[364, 374], [372, 354], [390, 334], [405, 331], [423, 332], [434, 335], [420, 354], [409, 360], [388, 361], [381, 364]], "text": "Cola", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "G", "recog_valid": false, "glyph_recog_text": "Cola", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000171997.jpg", "caption": "a group of kids playing baseball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499725.jpg", "caption": "a red motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335902.jpg", "caption": "a white and blue rv parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000335914.jpg", "caption": "a table with scissors and other items on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237611.jpg", "caption": "two bicycles are parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073783.jpg", "caption": "a man in black shirt and black shorts playing tennis", "annotations": [{"polygon": [[1, 165], [141, 168], [142, 211], [1, 210]], "text": "SAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SAN", "recog_valid": true, "glyph_recog_text": "SAN", "glyph_recog_ld": 1.0}, {"polygon": [[366, 132], [364, 157], [498, 164], [501, 138], [366, 131]], "text": "RICOH", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "RICOH", "recog_valid": true, "glyph_recog_text": "RICOH", "glyph_recog_ld": 1.0}, {"polygon": [[367, 196], [416, 198], [402, 230], [362, 227], [365, 197]], "text": "97", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "97", "recog_valid": true, "glyph_recog_text": "97", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368702.jpg", "caption": "a dog is standing in front of a train car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073790.jpg", "caption": "a street sign with stickers on it", "annotations": [{"polygon": [[126, 266], [154, 287], [141, 296], [117, 273]], "text": "not", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "not", "recog_valid": true, "glyph_recog_text": "not", "glyph_recog_ld": 1.0}, {"polygon": [[211, 340], [201, 353], [235, 383], [241, 374]], "text": "love", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "love", "recog_valid": true, "glyph_recog_text": "love", "glyph_recog_ld": 1.0}, {"polygon": [[159, 291], [148, 302], [176, 327], [185, 316]], "text": "fall", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "fall", "recog_valid": true, "glyph_recog_text": "fall", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467027.jpg", "caption": "a man riding a horse in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073814.jpg", "caption": "a baseball player sliding into home plate", "annotations": [{"polygon": [[239, 269], [260, 286], [280, 256], [249, 235]], "text": "24", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "24", "recog_valid": true, "glyph_recog_text": "24", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000204889.jpg", "caption": "a dog is walking on the street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041074.jpg", "caption": "a stuffed animal with a book", "annotations": [{"polygon": [[214, 253], [223, 280], [341, 241], [333, 213]], "text": "ELDEST", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ELPEST", "recog_valid": false, "glyph_recog_text": "ELDEST", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[269, 479], [263, 509], [357, 510], [358, 487]], "text": "Alexandra", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Clexandia", "recog_valid": false, "glyph_recog_text": "Alexandra", "glyph_recog_ld": 0.7777780246910837}, {"polygon": [[384, 480], [383, 512], [443, 509], [442, 481]], "text": "Abreu", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Cbhia", "recog_valid": false, "glyph_recog_text": "Abreu", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172151.jpg", "caption": "a no exit sign on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000172160.jpg", "caption": "a man standing next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467076.jpg", "caption": "a car is driving down a street with a white arrow pointing to the right", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000237701.jpg", "caption": "a man holding an umbrella in front of a store", "annotations": [{"polygon": [[256, 76], [504, 122], [503, 190], [260, 180]], "text": "bugabo", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "bugaba", "recog_valid": false, "glyph_recog_text": "bugabo", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139400.jpg", "caption": "a large clock tower with two clocks on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000008339.jpg", "caption": "a group of police officers on motorcycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073898.jpg", "caption": "many boats are docked at a marina", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073904.jpg", "caption": "a plate with a hot dog and a soda", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000008379.jpg", "caption": "a sign that says parking on a pole", "annotations": [{"polygon": [[186, 186], [184, 221], [281, 241], [282, 209]], "text": "PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PARKING", "recog_valid": true, "glyph_recog_text": "PARKING", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000368827.jpg", "caption": "a box of donuts", "annotations": [{"polygon": [[365, 394], [321, 413], [302, 401], [345, 378]], "text": "hill", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "11", "recog_valid": false, "glyph_recog_text": "hill", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[302, 448], [373, 416], [392, 426], [341, 448]], "text": "one", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "OUNEP.", "recog_valid": false, "glyph_recog_text": "小n分", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000073941.jpg", "caption": "a skier is jumping over a ramp in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000499958.jpg", "caption": "a white mouse and keyboard sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532734.jpg", "caption": "two men on a podium with one man waving", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000401670.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[186, 151], [214, 170], [200, 213], [182, 198]], "text": "W", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "多", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[232, 184], [264, 204], [256, 247], [222, 224]], "text": "31", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "品", "recog_valid": false, "glyph_recog_text": "LE", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[201, 266], [243, 246], [266, 264], [263, 274], [194, 305]], "text": "DYER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DYEN", "recog_valid": false, "glyph_recog_text": "DYER", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303376.jpg", "caption": "a vintage photo of two men standing next to an airplane", "annotations": [{"polygon": [[134, 274], [222, 245], [227, 264], [140, 292]], "text": "VH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VHHOV", "recog_valid": false, "glyph_recog_text": "VH", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106792.jpg", "caption": "a man wearing a hat", "annotations": [{"polygon": [[218, 339], [215, 348], [212, 360], [219, 363], [232, 363], [249, 359], [268, 355], [291, 349], [302, 346], [296, 323]], "text": "Canad", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Canad", "recog_valid": true, "glyph_recog_text": "Canad", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532790.jpg", "caption": "an osprey takes off from the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467256.jpg", "caption": "a woman in a red coat standing in front of a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270651.jpg", "caption": "ingredients for a salad with vegetables and cheese", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270659.jpg", "caption": "a person riding skis down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139612.jpg", "caption": "a truck with a man on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270696.jpg", "caption": "a man in a plaid shirt and hat standing next to another man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205159.jpg", "caption": "woman in a blue dress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074100.jpg", "caption": "a bus driving down a snowy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074114.jpg", "caption": "a surfboard on the beach", "annotations": [{"polygon": [[311, 250], [315, 267], [325, 262], [333, 262], [339, 264], [348, 271], [349, 270], [352, 243], [344, 241], [334, 240], [323, 243]], "text": "LESSONS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ve", "recog_valid": false, "glyph_recog_text": "大", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270740.jpg", "caption": "a baseball player swinging at a pitch during a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369049.jpg", "caption": "a black and white photo of a union lawn shop", "annotations": [{"polygon": [[226, 149], [225, 182], [365, 186], [366, 149]], "text": "UNI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNION", "recog_valid": false, "glyph_recog_text": "UNI", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[150, 223], [183, 218], [238, 216], [238, 248], [218, 251], [197, 249], [148, 252]], "text": "PAWN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AWN", "recog_valid": false, "glyph_recog_text": "PAWN", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[301, 217], [300, 248], [462, 256], [464, 224]], "text": "SHOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SHOP", "recog_valid": true, "glyph_recog_text": "SHOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434587.jpg", "caption": "a red bus parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139696.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238001.jpg", "caption": "a group of people standing in a line with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270784.jpg", "caption": "a train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565699.jpg", "caption": "a bus is parked at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106973.jpg", "caption": "a green and yellow train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000532958.jpg", "caption": "a kitchen with a refrigerator, stove and a microwave", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369125.jpg", "caption": "a remote control sitting on a desk next to a keyboard", "annotations": [{"polygon": [[90, 311], [103, 306], [125, 335], [111, 340]], "text": "Firefly", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "firefly", "recog_valid": false, "glyph_recog_text": "Fieefly", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074213.jpg", "caption": "a row of pink and white scooters are lined up", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139757.jpg", "caption": "a dog laying on the ground in front of a purple door", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000106994.jpg", "caption": "a woman riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205317.jpg", "caption": "a man riding a surfboard in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107052.jpg", "caption": "three baseball players standing on a baseball field", "annotations": [{"polygon": [[211, 225], [206, 230], [206, 235], [206, 249], [207, 254], [213, 258], [220, 258], [227, 254], [231, 258], [241, 258], [245, 252], [246, 230], [242, 227]], "text": "39", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "39", "recog_valid": true, "glyph_recog_text": "39", "glyph_recog_ld": 1.0}, {"polygon": [[165, 326], [163, 356], [168, 361], [183, 363], [208, 367], [210, 355], [213, 335], [210, 331]], "text": "52", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "52", "recog_valid": true, "glyph_recog_text": "52", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303668.jpg", "caption": "bananas on a stand", "annotations": [{"polygon": [[102, 120], [164, 76], [214, 55], [240, 44], [275, 40], [304, 43], [329, 37], [382, 47], [379, 93], [368, 105], [351, 101], [305, 97], [238, 106], [211, 118], [181, 137], [145, 170]], "text": "BANANAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BANAMIS", "recog_valid": false, "glyph_recog_text": "BANANAS", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[273, 247], [290, 243], [321, 229], [328, 237], [302, 254], [278, 260]], "text": "unDER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UNDER", "recog_valid": false, "glyph_recog_text": "unDER", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[177, 215], [223, 250], [213, 257], [178, 230]], "text": "WOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WON", "recog_valid": false, "glyph_recog_text": "wow", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[162, 157], [225, 247], [267, 240], [321, 226], [340, 160], [311, 99]], "text": "$19", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5)9", "recog_valid": false, "glyph_recog_text": "$19", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[386, 240], [386, 276], [436, 267], [437, 238]], "text": "Corn", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Com", "recog_valid": false, "glyph_recog_text": "Corn", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336445.jpg", "caption": "a street sign that is leaning on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041549.jpg", "caption": "a man with glasses and earphones is looking at his phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000434765.jpg", "caption": "a man with a cart and a dog on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565849.jpg", "caption": "a man in a suit and tie is holding a flower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041568.jpg", "caption": "a man and a dog sitting on a cobblestone street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369256.jpg", "caption": "a blue train traveling down a track next to a mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000270959.jpg", "caption": "a street sign with a bus stop sign and a parking sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565886.jpg", "caption": "a living room with a fireplace, couches and a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000008848.jpg", "caption": "a bed with a large comforter and pillows", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041616.jpg", "caption": "a man is holding a pink frisbee in his mouth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238231.jpg", "caption": "a man riding a motorcycle down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000074398.jpg", "caption": "a person riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205477.jpg", "caption": "a kitchen counter with a blender, a blender bottle, and a box of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271038.jpg", "caption": "a clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205520.jpg", "caption": "a stop sign and a sign that says tony s and s", "annotations": [{"polygon": [[344, 286], [465, 268], [471, 315], [349, 333]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[155, 102], [149, 120], [191, 129], [223, 146], [230, 136], [204, 115]], "text": "SAM'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SAM'S", "recog_valid": true, "glyph_recog_text": "SAM'S", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000139987.jpg", "caption": "a person hanging upside down from a street sign", "annotations": [{"polygon": [[149, 49], [220, 70], [220, 70], [229, 92], [152, 68], [147, 60]], "text": "OBAMA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "OBAMA", "recog_valid": true, "glyph_recog_text": "OBAMA", "glyph_recog_ld": 1.0}, {"polygon": [[300, 93], [300, 93], [378, 45], [382, 49], [386, 60], [378, 67], [378, 67], [301, 113]], "text": "PRESIDIO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "PRESIDIO", "recog_valid": true, "glyph_recog_text": "PRESIDIO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000565979.jpg", "caption": "a large clock on a building with a sign has texts", "annotations": [{"polygon": [[149, 402], [151, 417], [282, 390], [280, 373]], "text": "Who's", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "少少-卜夕二之夕", "recog_valid": false, "glyph_recog_text": "W ho's", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[179, 336], [178, 379], [294, 353], [292, 312]], "text": "PASELA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PHSELA", "recog_valid": false, "glyph_recog_text": "PASELA", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369380.jpg", "caption": "an elephant standing in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402174.jpg", "caption": "a large jet airplane sitting on top of an airport tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041730.jpg", "caption": "a lunch box with rice, broccoli, and orange", "annotations": [{"polygon": [[126, 102], [127, 117], [141, 115], [151, 111], [156, 107], [164, 103], [170, 98], [176, 92], [182, 80], [178, 69], [174, 73], [172, 80], [162, 92], [156, 95], [148, 99]], "text": "ACTIVIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ACTIVIA", "recog_valid": true, "glyph_recog_text": "ACTIVIA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271136.jpg", "caption": "a man in blue shirt and tan pants playing tennis", "annotations": [{"polygon": [[213, 337], [213, 337], [197, 380], [314, 385], [315, 338]], "text": "ASG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ASS", "recog_valid": false, "glyph_recog_text": "ASG", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566085.jpg", "caption": "a man holding an umbrella in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041808.jpg", "caption": "a large airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205650.jpg", "caption": "a man in a suit standing on the side of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435026.jpg", "caption": "a brown teddy bear with a scarf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500576.jpg", "caption": "a woman is walking a horse in an arena", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369512.jpg", "caption": "people are working on a large rock in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533356.jpg", "caption": "a woman standing on a street corner in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238448.jpg", "caption": "a large clock with a blue and gold face", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238455.jpg", "caption": "a clock on a building with a statue of an eagle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303992.jpg", "caption": "photo '2012, sf giants vs yankees, sf bay bridge, sf, california, united states' - license http //creat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205689.jpg", "caption": "a person's hand is pointing at a refrigerator", "annotations": [{"polygon": [[203, 182], [299, 202], [294, 234], [203, 206]], "text": "word", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WOYD", "recog_valid": false, "glyph_recog_text": "word", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000303991.jpg", "caption": "a man on a skateboard doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000304011.jpg", "caption": "a double decker bus parked next to a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369567.jpg", "caption": "a woman in a green sweater and scarf is looking at the camera", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041913.jpg", "caption": "bananas for sale at the market", "annotations": [{"polygon": [[404, 65], [404, 65], [413, 64], [429, 65], [444, 65], [453, 65], [469, 66], [475, 75], [470, 105], [447, 100], [434, 100], [416, 105], [405, 104], [401, 97], [401, 97]], "text": "3000", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "3000", "recog_valid": true, "glyph_recog_text": "3000", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000205756.jpg", "caption": "a young girl and boy sitting at a table in a tent", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238535.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435145.jpg", "caption": "a street with a street light and a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402386.jpg", "caption": "a blue street sign", "annotations": [{"polygon": [[215, 244], [211, 280], [375, 307], [378, 270], [214, 241]], "text": "SHORTLAND", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SHORTLAND", "recog_valid": true, "glyph_recog_text": "SHORTLAND", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000467925.jpg", "caption": "a bus driving down a street in the rain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000336887.jpg", "caption": "a blue and yellow building with two stuffed animals on the beach", "annotations": [{"polygon": [[207, 333], [209, 356], [248, 345], [242, 326], [218, 328]], "text": "Thank", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Thank", "recog_valid": true, "glyph_recog_text": "Thank", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271363.jpg", "caption": "a group of rowers in a boat on the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000041995.jpg", "caption": "a pizza shaped clock", "annotations": [{"polygon": [[252, 144], [251, 171], [281, 168], [278, 141]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "12", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238618.jpg", "caption": "a black and white photo of a cow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107617.jpg", "caption": "a green and white train on tracks near a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369762.jpg", "caption": "a white and yellow train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566368.jpg", "caption": "a set of remote controls sitting on top of a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173163.jpg", "caption": "a train is parked at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566385.jpg", "caption": "a man sitting behind a counter with a variety of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107638.jpg", "caption": "a man doing a trick on a skateboard in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468100.jpg", "caption": "a cow laying down on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402569.jpg", "caption": "a tray of food on a table", "annotations": [{"polygon": [[166, 139], [211, 108], [221, 130], [175, 160]], "text": "RB", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "P", "recog_valid": false, "glyph_recog_text": "RB", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[406, 259], [390, 282], [415, 314], [434, 316], [445, 286], [435, 265]], "text": "Miss", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "送", "recog_valid": false, "glyph_recog_text": "Miss", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271505.jpg", "caption": "a row of bicycles parked on the side of the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500900.jpg", "caption": "a group of people standing around a concrete hole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533684.jpg", "caption": "a woman is lassoing a horse in an arena", "annotations": [{"polygon": [[138, 163], [192, 209], [201, 204], [185, 181], [148, 151], [141, 153]], "text": "Heather", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Heather", "recog_valid": true, "glyph_recog_text": "Heather", "glyph_recog_ld": 1.0}, {"polygon": [[197, 213], [209, 228], [270, 277], [284, 277], [269, 255], [236, 234], [218, 211], [211, 206], [203, 207]], "text": "Abounader", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Abounader", "recog_valid": true, "glyph_recog_text": "Abounader", "glyph_recog_ld": 1.0}, {"polygon": [[280, 287], [362, 365], [378, 356], [295, 276], [290, 276]], "text": "photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Photography", "recog_valid": false, "glyph_recog_text": "photography", "glyph_recog_ld": 0.909090991735462}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000009398.jpg", "caption": "a table with a bunch of bananas and books", "annotations": [{"polygon": [[181, 305], [185, 291], [232, 395], [226, 405]], "text": "PROJECT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PROIECT", "recog_valid": false, "glyph_recog_text": "PRnJeoY", "glyph_recog_ld": 0.28571530612099116}, {"polygon": [[195, 272], [202, 259], [239, 331], [230, 342]], "text": "STEEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STA", "recog_valid": false, "glyph_recog_text": "STEEL", "glyph_recog_ld": 0.4000011999976}, {"polygon": [[19, 286], [40, 286], [95, 267], [117, 259], [117, 259], [121, 240], [64, 258], [20, 279]], "text": "teelbanana", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "0091031", "recog_valid": false, "glyph_recog_text": "teelbanans", "glyph_recog_ld": 9.99998999939855e-07}, {"polygon": [[1, 144], [11, 154], [36, 168], [86, 181], [110, 185], [101, 171], [64, 168], [24, 149], [0, 130]], "text": "steelbananas.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Teeipananas.con", "recog_valid": false, "glyph_recog_text": "steelbananas.com", "glyph_recog_ld": 0.6875001953123779}, {"polygon": [[31, 142], [33, 148], [42, 153], [115, 146], [167, 132], [191, 126], [187, 118], [153, 128], [107, 137]], "text": "bananas", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "nanaeelban ahs-(", "recog_valid": false, "glyph_recog_text": "bananas", "glyph_recog_ld": 0.3750003906247559}, {"polygon": [[332, 91], [354, 104], [370, 130], [380, 137], [379, 129], [379, 129], [366, 106], [347, 91], [336, 89]], "text": "steel", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "719", "recog_valid": false, "glyph_recog_text": "steal", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[238, 126], [283, 163], [293, 161], [301, 167], [302, 163], [246, 118]], "text": "bananas", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "vrpoto9", "recog_valid": false, "glyph_recog_text": "bananas", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[287, 122], [322, 150], [336, 170], [346, 199], [346, 199], [354, 201], [346, 172], [341, 160], [329, 144], [309, 127], [290, 116]], "text": "www.steelbananas.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "o) p700917215200200", "recog_valid": false, "glyph_recog_text": "wwe seeibanarae.com", "glyph_recog_ld": 5.263155123946817e-07}, {"polygon": [[375, 438], [386, 437], [446, 360], [436, 358]], "text": "18.95", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "LE", "recog_valid": false, "glyph_recog_text": "13品", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[386, 346], [386, 346], [333, 427], [347, 427], [403, 344]], "text": "18.95", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "N-E5", "recog_valid": false, "glyph_recog_text": "¥8.95", "glyph_recog_ld": 0.2000015999967999}, {"polygon": [[361, 347], [310, 431], [326, 428], [377, 345]], "text": "GULCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "", "recog_valid": false, "glyph_recog_text": "SULGK", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[288, 338], [255, 424], [270, 423], [309, 336]], "text": "GULCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SRFCO", "recog_valid": false, "glyph_recog_text": "GULCH", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206013.jpg", "caption": "a plate with a sandwich and a bottle of jelly", "annotations": [{"polygon": [[168, 161], [188, 148], [212, 179], [188, 191]], "text": "OP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "oP", "recog_valid": false, "glyph_recog_text": "OP", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[143, 177], [165, 161], [196, 198], [175, 216]], "text": "DCKS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DCKS", "recog_valid": true, "glyph_recog_text": "DCKS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369866.jpg", "caption": "a woman with a pink umbrella walking down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000500954.jpg", "caption": "a person riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435433.jpg", "caption": "two black dogs are hugging each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435435.jpg", "caption": "two street signs on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566512.jpg", "caption": "a person riding a skateboard in a pool", "annotations": [{"polygon": [[241, 136], [276, 125], [284, 143], [249, 155]], "text": "etnies", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "etnies", "recog_valid": true, "glyph_recog_text": "etrie s", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000369920.jpg", "caption": "a coca cola bus", "annotations": [{"polygon": [[369, 106], [505, 115], [505, 196], [369, 184]], "text": "Coca.Col", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Tecali", "recog_valid": false, "glyph_recog_text": "Coca.Col", "glyph_recog_ld": 0.2500009374988281}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402689.jpg", "caption": "a woman and two children standing outside a cartier store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206083.jpg", "caption": "a bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271638.jpg", "caption": "a group of people sitting under tents at a skate park", "annotations": [{"polygon": [[355, 198], [358, 225], [357, 227], [418, 221], [416, 190]], "text": "VANS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "VANS", "recog_valid": true, "glyph_recog_text": "VANS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206109.jpg", "caption": "a black bird eating an apple", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140575.jpg", "caption": "a baseball player running to first base", "annotations": [{"polygon": [[475, 214], [474, 247], [474, 247], [511, 247], [510, 213], [510, 213]], "text": "MA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "MA", "recog_valid": true, "glyph_recog_text": "MA", "glyph_recog_ld": 1.0}, {"polygon": [[6, 140], [6, 140], [8, 185], [73, 184], [82, 140]], "text": "47", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "47", "recog_valid": true, "glyph_recog_text": "47", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000238881.jpg", "caption": "a table with a plate of food and a bowl of oranges", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173365.jpg", "caption": "a cat sitting on top of a keyboard", "annotations": [{"polygon": [[435, 204], [435, 204], [427, 231], [467, 252], [471, 241], [492, 256], [506, 218], [502, 214], [491, 222], [469, 219], [447, 213], [441, 202]], "text": "Royal", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Rogal", "recog_valid": false, "glyph_recog_text": "Roya", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[401, 166], [395, 196], [402, 204], [409, 197], [414, 202], [469, 206], [476, 176]], "text": "grand", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Grand", "recog_valid": false, "glyph_recog_text": "grand", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000107846.jpg", "caption": "three men sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435543.jpg", "caption": "a pair of scissors and a bag of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206170.jpg", "caption": "a yellow room with a red table and a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042339.jpg", "caption": "a group of people on skis standing on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140648.jpg", "caption": "a man in a suit standing on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042345.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[114, 188], [227, 207], [242, 228], [242, 233], [113, 215]], "text": "PEARL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PEARL", "recog_valid": true, "glyph_recog_text": "PEARL", "glyph_recog_ld": 1.0}, {"polygon": [[120, 234], [175, 243], [170, 292], [121, 282]], "text": "NW", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NW", "recog_valid": true, "glyph_recog_text": "NW", "glyph_recog_ld": 1.0}, {"polygon": [[193, 243], [367, 271], [373, 275], [374, 284], [374, 316], [370, 322], [363, 322], [193, 294]], "text": "FLANDERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FLANDERS", "recog_valid": true, "glyph_recog_text": "FLANDERS", "glyph_recog_ld": 1.0}, {"polygon": [[398, 287], [422, 292], [418, 316], [413, 317], [398, 314], [392, 308], [393, 292]], "text": "ST", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "ST", "recog_valid": true, "glyph_recog_text": "ST", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000533864.jpg", "caption": "a stop sign with a wedding sign on it", "annotations": [{"polygon": [[54, 374], [55, 406], [239, 404], [239, 360]], "text": "WEDDING", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WEODNG", "recog_valid": false, "glyph_recog_text": "WEDDING", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[257, 164], [253, 228], [410, 228], [410, 168]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435575.jpg", "caption": "a baseball game is being played on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566670.jpg", "caption": "four pictures of a street sign, a window and a street sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239000.jpg", "caption": "a motorcycle with a helmet and luggage on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140733.jpg", "caption": "four blue angels jets flying in formation", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370124.jpg", "caption": "a black and red steam engine train parked on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402902.jpg", "caption": "a man on a skateboard doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042481.jpg", "caption": "a crowd of people standing behind a fence", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140787.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000402941.jpg", "caption": "firefighters standing near a fire truck on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042516.jpg", "caption": "a person walking down a street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173611.jpg", "caption": "a dog wearing sunglasses and a bandana on a motorcycle", "annotations": [{"polygon": [[142, 262], [135, 288], [183, 306], [192, 288]], "text": "HARLEY-", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HARLEY", "recog_valid": false, "glyph_recog_text": "HARLEY.", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[160, 304], [154, 318], [187, 346], [233, 330], [224, 315], [191, 318]], "text": "CYCLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YE", "recog_valid": false, "glyph_recog_text": "CYCLES", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075311.jpg", "caption": "a living room with a tv, a bookcase, and a plant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075359.jpg", "caption": "a chair made out of skis sitting on the grass", "annotations": [{"polygon": [[302, 171], [278, 258], [285, 268], [308, 177]], "text": "ROSSIGNOL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ROSSIGNOL", "recog_valid": true, "glyph_recog_text": "904913601", "glyph_recog_ld": 1.1111098765503868e-06}, {"polygon": [[112, 350], [141, 382], [149, 377], [120, 346]], "text": "FISCHER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FISCHER", "recog_valid": true, "glyph_recog_text": "8", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566893.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140909.jpg", "caption": "a group of boats are docked in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000271996.jpg", "caption": "a vintage truck parked in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566911.jpg", "caption": "a man sitting at a table with a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435839.jpg", "caption": "a truck driving down a road with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000435882.jpg", "caption": "a computer keyboard and earphones on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000566973.jpg", "caption": "a mirror vase with red roses sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000140990.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[369, 200], [364, 206], [369, 215], [374, 235], [394, 239], [397, 236], [396, 226], [395, 217], [393, 205]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5C", "recog_valid": false, "glyph_recog_text": "一、", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173759.jpg", "caption": "a motorcycle with an american flag painted on the side", "annotations": [{"polygon": [[89, 342], [84, 344], [86, 352], [93, 362], [102, 368], [110, 373], [114, 368], [105, 364], [97, 357]], "text": "DUNLOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HOTNAC", "recog_valid": false, "glyph_recog_text": "OULOA", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108238.jpg", "caption": "a group of children eating lunch in a classroom", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141013.jpg", "caption": "a man riding a motorcycle with an american flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272095.jpg", "caption": "a woman wearing a headset and sitting at a desk with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000173798.jpg", "caption": "a double decker bus with a statue in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337638.jpg", "caption": "a person holding a remote control", "annotations": [{"polygon": [[249, 341], [273, 323], [281, 328], [282, 339], [256, 358], [256, 358], [256, 358]], "text": "LEGO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CEGO", "recog_valid": false, "glyph_recog_text": "LEGO", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108266.jpg", "caption": "a car covered in stuffed animals", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042741.jpg", "caption": "a boy wearing a tie and a cat sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403222.jpg", "caption": "a blue bike parked next to a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272173.jpg", "caption": "a green and white airplane on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272176.jpg", "caption": "a bottle of beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436023.jpg", "caption": "a desk with a microwave and a microwave oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000108365.jpg", "caption": "a clock and a pair of gloves on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337761.jpg", "caption": "a group of people holding up their hands", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010084.jpg", "caption": "a train traveling on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000042867.jpg", "caption": "a computer desk with a computer, a television, a lamp, a chair and a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239502.jpg", "caption": "a white train traveling down the tracks near a town", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239518.jpg", "caption": "a person skiing down a slope with a flag in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239520.jpg", "caption": "a display of ties on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305056.jpg", "caption": "a clock hanging from a ceiling in a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000501696.jpg", "caption": "a doll wearing a red and black striped shirt and black pants", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468932.jpg", "caption": "a man on a snowboard is standing on a slope", "annotations": [{"polygon": [[220, 219], [220, 219], [263, 214], [267, 253], [221, 254]], "text": "LANE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LAE", "recog_valid": false, "glyph_recog_text": "LANE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567240.jpg", "caption": "1961 hartman luggage ad vintage print", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272335.jpg", "caption": "a large clock on the side of a building", "annotations": [{"polygon": [[177, 296], [199, 286], [207, 302], [196, 318]], "text": "VIII", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "不", "recog_valid": false, "glyph_recog_text": "VI!I", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468956.jpg", "caption": "a young boy sitting in bed with books", "annotations": [{"polygon": [[288, 304], [381, 294], [380, 311], [289, 325]], "text": "GALLOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "0-", "recog_valid": false, "glyph_recog_text": "GALLOP", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141283.jpg", "caption": "a man holding a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567268.jpg", "caption": "a woman holding a large pizza with ham and cheese", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000468970.jpg", "caption": "a person cutting a pizza on a cutting board", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370669.jpg", "caption": "a man and woman standing under an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010230.jpg", "caption": "a man holding a baseball in his hand", "annotations": [{"polygon": [[214, 279], [224, 245], [242, 246], [261, 248], [276, 251], [292, 254], [298, 259], [304, 264], [302, 294], [289, 288], [268, 285], [247, 282], [229, 281]], "text": "CANADIANS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CANADUAP", "recog_valid": false, "glyph_recog_text": "CANADIANS", "glyph_recog_ld": 0.6666670370366254}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239611.jpg", "caption": "a large truck driving down a city street", "annotations": [{"polygon": [[177, 107], [174, 129], [172, 133], [244, 150], [246, 156], [251, 156], [257, 130], [209, 114], [178, 107]], "text": "Subway", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Subway", "recog_valid": true, "glyph_recog_text": "Subway", "glyph_recog_ld": 1.0}, {"polygon": [[401, 131], [409, 166], [485, 146], [486, 166], [489, 164], [489, 132], [457, 123], [433, 120], [401, 131]], "text": "Whitney", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Wkitiey", "recog_valid": false, "glyph_recog_text": "Whitney", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010265.jpg", "caption": "a man playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000337975.jpg", "caption": "a baseball player is swinging at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075841.jpg", "caption": "two young men playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436303.jpg", "caption": "a woodpecker is perched on a tree branch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010319.jpg", "caption": "a young boy is playing with a bat", "annotations": [{"polygon": [[291, 299], [337, 260], [347, 274], [308, 313]], "text": "REGOI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REGO", "recog_valid": false, "glyph_recog_text": "REGOI", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000206937.jpg", "caption": "a young boy eating a banana", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436319.jpg", "caption": "a shirt, a pair of scissors, and a pair of sunglasses", "annotations": [{"polygon": [[266, 227], [358, 236], [381, 238], [405, 244], [431, 248], [432, 269], [431, 270], [363, 260], [312, 255], [276, 252], [261, 249], [263, 226]], "text": "REARDEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "REARDEN", "recog_valid": true, "glyph_recog_text": "REARDEN", "glyph_recog_ld": 1.0}, {"polygon": [[326, 281], [377, 285], [403, 290], [448, 297], [449, 289], [434, 287], [431, 271], [365, 261], [326, 259]], "text": "STEEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "STEEL", "recog_valid": true, "glyph_recog_text": "STEEL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010342.jpg", "caption": "a little girl sitting in a chair looking out the window", "annotations": [{"polygon": [[365, 71], [371, 81], [336, 127], [329, 120]], "text": "SOUTHWEST", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SOUTHWEST", "recog_valid": true, "glyph_recog_text": "SCUTHNEAT", "glyph_recog_ld": 0.6666670370366254}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534633.jpg", "caption": "two people on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000075901.jpg", "caption": "a man playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272510.jpg", "caption": "a computer monitor sitting on a desk with a laptop and a mouse", "annotations": [{"polygon": [[238, 176], [243, 171], [270, 197], [265, 202]], "text": "SPEAKE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SPEAKEK", "recog_valid": false, "glyph_recog_text": "cpcave", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436354.jpg", "caption": "a table with a bunch of fruit and vegetables", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000239803.jpg", "caption": "three boys standing next to a stop sign", "annotations": [{"polygon": [[49, 181], [49, 181], [181, 175], [186, 117], [52, 121], [46, 182]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010434.jpg", "caption": "a man on a skateboard doing a trick on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207048.jpg", "caption": "a group of people posing for a picture on a snowy mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174282.jpg", "caption": "a clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370892.jpg", "caption": "two buses parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534735.jpg", "caption": "a cell phone, keys, and a rubber duck are on a bed", "annotations": [{"polygon": [[158, 266], [160, 268], [214, 241], [211, 238]], "text": "PIPE SCREENS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Lat deirde", "glyph_recog_ld": 9.99998999939855e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043232.jpg", "caption": "a group of people standing in a field with a kite", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534756.jpg", "caption": "a small town with a small store and a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076005.jpg", "caption": "a large teddy bear sitting on a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000370922.jpg", "caption": "a flatbed tow truck with four atvs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403692.jpg", "caption": "a clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534772.jpg", "caption": "a group of people sitting in the stands with their hands up", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141566.jpg", "caption": "a woman walking down the sidewalk in front of a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403718.jpg", "caption": "a woman standing in front of a television playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338189.jpg", "caption": "a boat with a sail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305445.jpg", "caption": "a set of measuring spoons in a metal bowl", "annotations": [{"polygon": [[391, 271], [398, 279], [480, 232], [472, 222]], "text": "1 TABLESPOON", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LABLESPOON", "recog_valid": false, "glyph_recog_text": "1TABLESPOON", "glyph_recog_ld": 0.8181819834709241}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338214.jpg", "caption": "a group of people playing a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436538.jpg", "caption": "a young girl sitting on a swing with a stuffed animal", "annotations": [{"polygon": [[200, 277], [219, 275], [239, 274], [265, 276], [288, 279], [313, 283], [331, 293], [338, 310], [325, 324], [302, 314], [284, 309], [269, 307], [251, 302], [236, 306], [227, 308], [217, 310], [202, 312]], "text": "Raymonds", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GOnOTDney", "recog_valid": false, "glyph_recog_text": "Raymonds", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174410.jpg", "caption": "a blue building with a boat in front of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502116.jpg", "caption": "two police officers riding horses down a busy street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272758.jpg", "caption": "a street sign is silhouetted against the setting sun", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076150.jpg", "caption": "a sign that says 20 zone", "annotations": [{"polygon": [[219, 224], [219, 224], [340, 246], [344, 281], [217, 262]], "text": "ZONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ZONE", "recog_valid": true, "glyph_recog_text": "ZONE", "glyph_recog_ld": 1.0}, {"polygon": [[275, 370], [305, 367], [351, 372], [358, 402], [348, 406], [276, 400]], "text": "LIVES!", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "LIVES", "recog_valid": false, "glyph_recog_text": "LIVES!", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[232, 161], [309, 180], [321, 171], [321, 147], [314, 131], [305, 125], [292, 122], [248, 109], [233, 114], [229, 124]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "20", "recog_valid": true, "glyph_recog_text": "20", "glyph_recog_ld": 1.0}, {"polygon": [[200, 361], [212, 359], [238, 360], [253, 364], [268, 367], [266, 385], [259, 397], [202, 392]], "text": "SAVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SAVE", "recog_valid": true, "glyph_recog_text": "SAVE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534906.jpg", "caption": "a man on skis holding a red shovel", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174462.jpg", "caption": "a city street at night with cars and buildings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043392.jpg", "caption": "a large clock with gold and blue decoration", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371072.jpg", "caption": "a van with a sign that says diary of a wimpy kid", "annotations": [{"polygon": [[184, 110], [196, 149], [256, 161], [265, 133], [190, 110], [184, 110]], "text": "Diary", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Diar", "recog_valid": false, "glyph_recog_text": "Diary", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[164, 179], [179, 217], [243, 226], [256, 195], [233, 188], [199, 180], [166, 179]], "text": "Wimpy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wimpy", "recog_valid": true, "glyph_recog_text": "Wimpy", "glyph_recog_ld": 1.0}, {"polygon": [[259, 179], [260, 217], [292, 215], [291, 187], [276, 182], [259, 179]], "text": "Kid", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Kid", "recog_valid": true, "glyph_recog_text": "K", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[261, 240], [264, 284], [292, 281], [290, 239], [262, 241]], "text": "13", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "92", "recog_valid": false, "glyph_recog_text": "o-(", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[183, 244], [185, 296], [217, 293], [213, 241]], "text": "11", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "一", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207236.jpg", "caption": "a city street with people on bikes and cars", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502153.jpg", "caption": "a blue truck with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000403853.jpg", "caption": "a jockey rides a horse over a jump in front of a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000534926.jpg", "caption": "a black and white photo of three men on a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338321.jpg", "caption": "a large clock on a building with lights", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240022.jpg", "caption": "a pizza with ham and greens on top", "annotations": [{"polygon": [[339, 108], [343, 76], [447, 78], [447, 114]], "text": "Bchst", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "3chst", "recog_valid": false, "glyph_recog_text": "Bchst", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[411, 120], [447, 124], [448, 154], [404, 146]], "text": "m", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "m", "recog_valid": true, "glyph_recog_text": "m", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469431.jpg", "caption": "two snowboarders on a mountain", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240069.jpg", "caption": "a red fire hydrant sitting in a yard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371149.jpg", "caption": "a red and blue fire hydrant", "annotations": [{"polygon": [[205, 178], [214, 165], [222, 159], [236, 155], [246, 156], [244, 169], [228, 170], [220, 177], [216, 186]], "text": "STOPZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SOA", "recog_valid": false, "glyph_recog_text": "STOPZ", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567764.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[170, 260], [167, 297], [251, 300], [253, 266]], "text": "Phillies", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Phillieo", "recog_valid": false, "glyph_recog_text": "Phillies", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010716.jpg", "caption": "a sign that is on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305635.jpg", "caption": "two pizzas sitting on top of an oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000305638.jpg", "caption": "a plane is parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010727.jpg", "caption": "a group of people standing around a buffet table", "annotations": [{"polygon": [[129, 160], [160, 154], [166, 170], [201, 178], [202, 192], [132, 195], [135, 179], [126, 176]], "text": "POLM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "F20D", "recog_valid": false, "glyph_recog_text": "POLM", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076268.jpg", "caption": "a white and red bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371198.jpg", "caption": "a bathroom with a round tub and a sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109060.jpg", "caption": "a clock on the roof of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000272901.jpg", "caption": "a man riding a bike on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109078.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[212, 192], [201, 249], [121, 248], [125, 191]], "text": "EKEr", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EE2", "recog_valid": false, "glyph_recog_text": "EKEr", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076308.jpg", "caption": "a bus driving down a snowy road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567837.jpg", "caption": "a stop sign with a 4-way sign on it", "annotations": [{"polygon": [[361, 206], [366, 283], [146, 286], [153, 204]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[246, 392], [306, 391], [300, 420], [251, 421]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174627.jpg", "caption": "a baseball player in a green and white uniform is pitching a ball", "annotations": [{"polygon": [[297, 139], [347, 139], [339, 184], [293, 186]], "text": "ZZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2花", "recog_valid": false, "glyph_recog_text": "zz", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000141874.jpg", "caption": "a man and a child petting an elephant at a zoo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371302.jpg", "caption": "a woman with curly hair poses with a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436837.jpg", "caption": "a row of luggage on a shelf in a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000010890.jpg", "caption": "a group of people standing in front of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436878.jpg", "caption": "a blue train on the tracks", "annotations": [{"polygon": [[177, 284], [176, 312], [196, 314], [209, 324], [214, 325], [238, 318], [240, 287], [232, 277]], "text": "LOK Magazin", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "LOr egr", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109203.jpg", "caption": "a woman holding a tennis racket on a tennis court", "annotations": [{"polygon": [[449, 104], [449, 137], [338, 143], [334, 135], [339, 109], [441, 103]], "text": "ROTEG", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "ROTEG", "recog_valid": true, "glyph_recog_text": "ROTEG", "glyph_recog_ld": 1.0}, {"polygon": [[422, 239], [411, 271], [415, 281], [447, 284], [448, 243]], "text": "c", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "C", "recog_valid": false, "glyph_recog_text": "c", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[350, 243], [361, 280], [396, 281], [403, 241]], "text": "K", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "K", "recog_valid": true, "glyph_recog_text": "K", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273053.jpg", "caption": "a hot dog and a drink on a table", "annotations": [{"polygon": [[2, 202], [3, 248], [413, 190], [418, 161], [229, 181], [224, 181], [223, 162], [130, 172], [127, 194]], "text": "ankfur", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ankfu", "recog_valid": false, "glyph_recog_text": " ankfur", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469661.jpg", "caption": "a cat is sitting on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436901.jpg", "caption": "a man in black leather riding a blue motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000567976.jpg", "caption": "a fire truck parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207542.jpg", "caption": "two dogs sitting in the back of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174794.jpg", "caption": "a woman sitting on a bench in a market area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338642.jpg", "caption": "a baby sitting on a couch with a remote control", "annotations": [{"polygon": [[317, 247], [310, 276], [319, 275], [322, 261], [343, 253], [347, 260], [350, 256], [350, 245], [354, 241], [353, 230]], "text": "Jung", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ang", "recog_valid": false, "glyph_recog_text": "Jung", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000502482.jpg", "caption": "a skateboarder is doing a trick on a rail", "annotations": [{"polygon": [[291, 469], [353, 462], [386, 450], [422, 469], [422, 492], [377, 493], [360, 509], [337, 496], [289, 493]], "text": "marplaskate", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "marpla kate", "recog_valid": false, "glyph_recog_text": "marplaskate", "glyph_recog_ld": 0.909090991735462}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535251.jpg", "caption": "a man wearing a helmet riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240344.jpg", "caption": "three men walking on the tarmac with skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109281.jpg", "caption": "a man standing in a living room with a couch and a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207595.jpg", "caption": "a man skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000436981.jpg", "caption": "a blue double decker bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043813.jpg", "caption": "a man in a tie and shirt standing in front of a truck", "annotations": [{"polygon": [[103, 456], [70, 474], [75, 486], [108, 466]], "text": "CDOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "CDOT", "recog_valid": true, "glyph_recog_text": "COOT", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043815.jpg", "caption": "a train station with several trains parked on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000568112.jpg", "caption": "a man sitting on a couch in a room with a fireplace", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174896.jpg", "caption": "a table with food and a laptop on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273205.jpg", "caption": "a lamp and a parking meter on the sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174909.jpg", "caption": "a white bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174911.jpg", "caption": "a blue motorcycle parked on the side of a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000404292.jpg", "caption": "a man and woman standing in a field holding a wine glass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109386.jpg", "caption": "a busy intersection with cars and pedestrians", "annotations": [{"polygon": [[511, 206], [454, 225], [453, 214], [511, 192]], "text": "EICHBE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "EICHBE", "recog_valid": true, "glyph_recog_text": "EtCHEE", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273253.jpg", "caption": "a baseball jersey and glove are displayed in a display case", "annotations": [{"polygon": [[285, 218], [278, 240], [216, 234], [233, 172], [249, 174], [260, 179], [273, 215], [279, 217]], "text": "Cap", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Car", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[293, 205], [287, 239], [316, 239], [348, 235], [360, 228], [365, 210], [365, 201]], "text": "dinals", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "dinals", "recog_valid": true, "glyph_recog_text": "dinals", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000174952.jpg", "caption": "a person is doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043936.jpg", "caption": "a group of police officers riding motorcycles down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469934.jpg", "caption": "a living room with a red chair and a television", "annotations": [{"polygon": [[429, 189], [433, 193], [436, 186], [444, 183], [450, 186], [458, 194], [461, 202], [455, 209], [457, 210], [460, 208], [464, 197], [457, 186], [450, 182], [441, 180]], "text": "C", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "c", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000338872.jpg", "caption": "a man and two women are eating food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000043968.jpg", "caption": "a young boy is riding a snowboard on a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371647.jpg", "caption": "a clock mounted on the wall of a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011205.jpg", "caption": "a cat sitting on the back of a motorcycle", "annotations": [{"polygon": [[463, 347], [419, 404], [407, 389], [453, 331]], "text": "55-XH", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "55-XH", "recog_valid": true, "glyph_recog_text": "55-XH", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273358.jpg", "caption": "a sign that says nicholas barber shop", "annotations": [{"polygon": [[109, 193], [109, 193], [151, 183], [215, 178], [286, 177], [351, 176], [399, 181], [428, 187], [440, 157], [422, 147], [373, 143], [330, 139], [283, 138], [232, 137], [190, 140], [154, 143], [126, 146], [97, 156]], "text": "NIChoLas", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NIChOLas", "recog_valid": false, "glyph_recog_text": "NIChoLas", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[176, 275], [359, 272], [365, 286], [360, 307], [348, 331], [292, 333], [248, 335], [230, 335], [175, 338], [170, 305]], "text": "SHOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SHOP", "recog_valid": true, "glyph_recog_text": "SHOP", "glyph_recog_ld": 1.0}, {"polygon": [[99, 207], [143, 196], [209, 195], [320, 191], [375, 195], [401, 198], [422, 200], [424, 255], [419, 257], [394, 253], [365, 250], [350, 250], [300, 250], [244, 251], [190, 255], [122, 278]], "text": "BARBER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BARBER", "recog_valid": true, "glyph_recog_text": "BARBER", "glyph_recog_ld": 1.0}, {"polygon": [[232, 415], [202, 488], [179, 476], [218, 408], [227, 409]], "text": "Since", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "Since", "recog_valid": true, "glyph_recog_text": "Since", "glyph_recog_ld": 1.0}, {"polygon": [[345, 406], [371, 462], [346, 464], [321, 418], [339, 404]], "text": "1967", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "1967", "recog_valid": true, "glyph_recog_text": "1967", "glyph_recog_ld": 1.0}, {"polygon": [[66, 94], [67, 110], [78, 116], [100, 110], [181, 82], [187, 65], [182, 46], [169, 37], [107, 59]], "text": "Grill", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "o.Grill", "recog_valid": false, "glyph_recog_text": "Grill", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000469975.jpg", "caption": "a yellow bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000273379.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240632.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306169.jpg", "caption": "a group of stuffed animals in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371717.jpg", "caption": "a man wearing a hood", "annotations": [{"polygon": [[219, 78], [211, 61], [221, 55], [240, 48], [253, 48], [264, 51], [277, 58], [265, 72]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "包", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[202, 89], [212, 80], [228, 73], [239, 71], [252, 70], [268, 70], [279, 74], [292, 81], [281, 94], [274, 91], [262, 90], [249, 90], [238, 91], [230, 93], [216, 103]], "text": "Birthday", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Birthday", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011271.jpg", "caption": "a man holding up a large pizza", "annotations": [{"polygon": [[238, 216], [236, 243], [250, 244], [266, 233], [279, 233], [292, 240], [300, 245], [304, 252], [305, 260], [314, 265], [323, 261], [323, 251], [321, 238], [316, 230], [308, 224], [300, 222], [293, 219], [283, 229], [281, 222], [278, 217], [274, 213], [264, 212]], "text": "AEROPOSTALE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ASPAY", "recog_valid": false, "glyph_recog_text": "AEROPOSTALE", "glyph_recog_ld": 0.2727279338836964}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011292.jpg", "caption": "a man holding a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437276.jpg", "caption": "a red fire truck parked in front of a building", "annotations": [{"polygon": [[394, 268], [403, 268], [412, 270], [444, 282], [446, 265], [444, 258], [412, 254], [391, 251]], "text": "Restaurant", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Restasrarn", "recog_valid": false, "glyph_recog_text": "Resia ui and", "glyph_recog_ld": 0.41666715277737265}, {"polygon": [[317, 285], [338, 273], [354, 268], [354, 256], [348, 253], [320, 253], [313, 254], [313, 270]], "text": "Ferrards", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Feraetn", "recog_valid": false, "glyph_recog_text": "Ferards", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470068.jpg", "caption": "a microwave and a table with food on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000207935.jpg", "caption": "a man jumping in the air to catch a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306240.jpg", "caption": "a baseball game in progress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240709.jpg", "caption": "additional photo for property listing at 515 n wabash street wabash,  indiana, in  52101 estados", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371786.jpg", "caption": "a man and woman in a kitchen preparing food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371807.jpg", "caption": "a bathroom with a toilet and a cart with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000076923.jpg", "caption": "a street sign and traffic light in front of a tall building", "annotations": [{"polygon": [[0, 173], [234, 211], [233, 248], [0, 214]], "text": "LEECKER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "LEECKER", "recog_valid": true, "glyph_recog_text": "LEECKER", "glyph_recog_ld": 1.0}, {"polygon": [[396, 126], [400, 140], [410, 129], [438, 64], [426, 64]], "text": "EKEA", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "SKEA", "recog_valid": false, "glyph_recog_text": "EKEA", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[367, 109], [371, 135], [380, 124], [407, 64], [385, 64]], "text": "LIV", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "2", "recog_valid": false, "glyph_recog_text": "LIV", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044178.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109738.jpg", "caption": "a man sitting in the back of a red truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240847.jpg", "caption": "a crane lifting a large green truck on a grassy field", "annotations": [{"polygon": [[168, 200], [195, 155], [199, 163], [173, 207]], "text": "MALS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MALS", "recog_valid": true, "glyph_recog_text": "荣小告", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[246, 163], [279, 195], [275, 198], [242, 166]], "text": "MAL'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "m", "recog_valid": false, "glyph_recog_text": "ha.i", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044247.jpg", "caption": "y ole 'olie's nutella's cake shop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000371927.jpg", "caption": "a pile of wires and computer parts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339189.jpg", "caption": "a man skiing down a snow covered slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000240889.jpg", "caption": "a man in a suit holding up two cell phones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011519.jpg", "caption": "a horse pulling a cart with a man on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306431.jpg", "caption": "a street sign with people walking around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077058.jpg", "caption": "a display of fruit", "annotations": [{"polygon": [[386, 111], [375, 130], [389, 142], [436, 135], [442, 126], [440, 108]], "text": "2'99", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "299", "recog_valid": false, "glyph_recog_text": "2'99", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[222, 120], [208, 145], [218, 156], [236, 155], [275, 138], [272, 120]], "text": "4'50", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "450", "recog_valid": false, "glyph_recog_text": "4'50", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[76, 144], [60, 179], [109, 171], [122, 166], [117, 141]], "text": "149", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "14o", "recog_valid": false, "glyph_recog_text": "149", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000109835.jpg", "caption": "a large clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437516.jpg", "caption": "a skateboarder is doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000535820.jpg", "caption": "a family of people in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437522.jpg", "caption": "a man on a bicycle is standing next to a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011569.jpg", "caption": "a pink double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437578.jpg", "caption": "a train is parked at a station in the middle of the day", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011613.jpg", "caption": "a man on skis in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437607.jpg", "caption": "a train is parked on the tracks next to some other trains", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011631.jpg", "caption": "a person on a dirt bike doing a jump", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044414.jpg", "caption": "a man eating a piece of food", "annotations": [{"polygon": [[347, 397], [357, 408], [387, 406], [447, 390], [448, 375], [414, 377]], "text": "Computer.", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "2mputer", "recog_valid": false, "glyph_recog_text": "Computer.", "glyph_recog_ld": 0.6666670370366254}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000077185.jpg", "caption": "a yellow car with luggage on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372136.jpg", "caption": "a woman in a pink wetsuit and a dog on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175530.jpg", "caption": "a man standing in front of a clock tower", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044463.jpg", "caption": "roundabout 20 mph sign", "annotations": [{"polygon": [[182, 296], [255, 294], [256, 341], [182, 335]], "text": "20", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "20", "recog_valid": true, "glyph_recog_text": "20", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175546.jpg", "caption": "a woman with glasses and a man eating food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470462.jpg", "caption": "a man is walking a horse down a path", "annotations": [{"polygon": [[158, 242], [157, 262], [169, 262], [182, 259], [184, 257], [189, 256], [195, 253], [197, 247], [194, 230], [185, 235], [179, 238], [170, 241]], "text": "RAPS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RAS", "recog_valid": false, "glyph_recog_text": "RAPS", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208319.jpg", "caption": "a stop sign with a message written on it", "annotations": [{"polygon": [[229, 207], [289, 200], [293, 175], [225, 182]], "text": "DON'T", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DONT", "recog_valid": false, "glyph_recog_text": "DON'T", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[196, 276], [309, 261], [331, 223], [323, 199], [194, 218], [188, 234], [184, 267]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470458.jpg", "caption": "a small plane sitting on top of a grassy field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000011720.jpg", "caption": "a person sitting at a desk with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000306640.jpg", "caption": "a young boy standing in front of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175608.jpg", "caption": "a delta airplane taking off from an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536058.jpg", "caption": "a table with breakfast food and drinks on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339454.jpg", "caption": "new orleans wedding photographer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044580.jpg", "caption": "a young boy is standing next to a fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208513.jpg", "caption": "a display of donuts on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000142977.jpg", "caption": "a man on a skateboard doing a trick on a rail", "annotations": [{"polygon": [[174, 259], [177, 380], [224, 384], [230, 398], [468, 389], [469, 336]], "text": "DTELOG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DTELOG", "recog_valid": true, "glyph_recog_text": "DTELOG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000339589.jpg", "caption": "a city street with parked cars and a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175751.jpg", "caption": "a bike lane on a street with a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437908.jpg", "caption": "a wooden bench sitting on a wooden boardwalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405145.jpg", "caption": "a large white airplane parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437944.jpg", "caption": "a woman cutting a cake", "annotations": [{"polygon": [[393, 418], [436, 398], [446, 415], [405, 436]], "text": "SANTT", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "SANTI", "recog_valid": false, "glyph_recog_text": "SANTT", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569017.jpg", "caption": "a boat in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000437970.jpg", "caption": "a group of people riding horses on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536278.jpg", "caption": "a display of teddy bears", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536286.jpg", "caption": "a baseball game with a batter at bat and a catcher", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503520.jpg", "caption": "a blue bus on the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241388.jpg", "caption": "a large inflatable yellow fire hydrant in the parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274160.jpg", "caption": "a person riding a dirt bike on a dirt track", "annotations": [{"polygon": [[118, 238], [127, 225], [162, 189], [178, 177], [187, 191], [170, 206], [153, 224], [126, 251]], "text": "TRACK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ROY", "recog_valid": false, "glyph_recog_text": "TRACK", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143091.jpg", "caption": "a blue train is on the tracks in a rural area", "annotations": [{"polygon": [[242, 249], [242, 277], [275, 277], [278, 243]], "text": "LTE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "星", "recog_valid": false, "glyph_recog_text": "LTE", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438004.jpg", "caption": "a bunch of bananas hanging from a wire rack", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241408.jpg", "caption": "a table with a sandwich on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012044.jpg", "caption": "a person cutting a cake on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470821.jpg", "caption": "two glasses of red wine on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000175912.jpg", "caption": "a man playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208714.jpg", "caption": "two pizzas on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307032.jpg", "caption": "a firefighter standing next to a meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000503640.jpg", "caption": "a man walking down the street with a clock on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438111.jpg", "caption": "thomas the tank engine at sydney railway station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044901.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208752.jpg", "caption": "a man swinging a tennis racket at a tennis ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470907.jpg", "caption": "an elephant with tusks walking through a forest", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208786.jpg", "caption": "a man in a wet suit riding a surfboard in a river", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307091.jpg", "caption": "a collage of various items including a chair, clock, lamp, and a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470933.jpg", "caption": "a black and white photo of a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012183.jpg", "caption": "an old airplane sitting on the ground in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470937.jpg", "caption": "a woman standing at a red light in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143258.jpg", "caption": "a table with a glass of orange juice and a plate of croissants", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000044960.jpg", "caption": "a black and white photo of a double decker bus", "annotations": [{"polygon": [[182, 348], [182, 348], [308, 369], [334, 342], [324, 339], [253, 330], [197, 337], [183, 344]], "text": "STOPI", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GS", "recog_valid": false, "glyph_recog_text": "STOPI", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[134, 361], [35, 408], [22, 419], [88, 433], [230, 432], [279, 390]], "text": "BUS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "J29", "recog_valid": false, "glyph_recog_text": "BUS", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438187.jpg", "caption": "a clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438196.jpg", "caption": "a group of people walking down the street with elephants", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012233.jpg", "caption": "a motorcycle parked next to a car and a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000470988.jpg", "caption": "a green bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405456.jpg", "caption": "a man with sunglasses", "annotations": [{"polygon": [[105, 196], [105, 196], [105, 196], [265, 179], [304, 95], [108, 114], [105, 196]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274386.jpg", "caption": "a man in a black shirt and orange shorts is holding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176084.jpg", "caption": "a man holding a remote control in his hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307194.jpg", "caption": "a parking meter with graffiti on it", "annotations": [{"polygon": [[301, 334], [307, 352], [344, 331], [337, 316]], "text": "poor", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Poor", "recog_valid": false, "glyph_recog_text": "poor", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110593.jpg", "caption": "a desk with a laptop and a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000372759.jpg", "caption": "a black cat sitting in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208931.jpg", "caption": "a woman in a bunny costume is laying on a bed with a man dressed as an easter bunny", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000536619.jpg", "caption": "a hawk sitting on a bench with people sitting on the bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438331.jpg", "caption": "a group of people riding motorcycles on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438348.jpg", "caption": "a woman holding an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569433.jpg", "caption": "a group of people kiteboarding on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000208994.jpg", "caption": "a siamese cat looking at itself in the mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000241764.jpg", "caption": "a person standing in front of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143490.jpg", "caption": "a group of people walking in the snow", "annotations": [{"polygon": [[274, 87], [270, 118], [320, 117], [318, 85]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "iA", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012418.jpg", "caption": "a woman walking down a street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438422.jpg", "caption": "a group of people standing around a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471191.jpg", "caption": "a yellow and white fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307352.jpg", "caption": "a yellow dump truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471217.jpg", "caption": "a bathroom with boxes and a toilet", "annotations": [{"polygon": [[430, 77], [423, 117], [444, 113], [449, 102], [465, 103], [467, 79]], "text": "Mytha's", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "恤", "recog_valid": false, "glyph_recog_text": "MAgnaes", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045259.jpg", "caption": "a woman eating a hot dog in a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110809.jpg", "caption": "a table with a blue and white checkered table cloth and a cup of coffee and a spoon", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000110819.jpg", "caption": "a man on a snowboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045292.jpg", "caption": "a woman is standing in the aisle of an airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569584.jpg", "caption": "a display of cakes in a store with a clear cover", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078065.jpg", "caption": "a man holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209178.jpg", "caption": "a woman and a boy sitting at a table with a cake", "annotations": [{"polygon": [[315, 331], [316, 362], [262, 365], [264, 331]], "text": "38", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "88", "recog_valid": false, "glyph_recog_text": "38", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340252.jpg", "caption": "an air canada airplane is parked at the gate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176440.jpg", "caption": "a man on a bike in a flooded street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176442.jpg", "caption": "a white and red uhaul truck parked in a parking lot", "annotations": [{"polygon": [[190, 123], [192, 148], [344, 120], [344, 113], [329, 115], [328, 94]], "text": "UHAUL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "UHAUL", "recog_valid": true, "glyph_recog_text": "UHAUL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373078.jpg", "caption": "a baseball player throwing a pitch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471409.jpg", "caption": "a large pizza on a table", "annotations": [{"polygon": [[154, 64], [154, 75], [162, 213], [141, 213], [126, 160], [118, 64]], "text": "roops", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "roops", "recog_valid": true, "glyph_recog_text": "LooQ", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078196.jpg", "caption": "a man sitting in a chair with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143745.jpg", "caption": "beware dog", "annotations": [{"polygon": [[26, 239], [266, 207], [250, 243], [27, 283]], "text": "BEWARE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PEWARE", "recog_valid": false, "glyph_recog_text": "BEWARE", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[104, 279], [247, 255], [250, 268], [240, 280], [227, 286], [108, 307]], "text": "DOG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DOG", "recog_valid": true, "glyph_recog_text": "D O G", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[14, 396], [36, 392], [47, 418], [26, 423]], "text": "XING", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "9NX", "recog_valid": false, "glyph_recog_text": "XING", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000045446.jpg", "caption": "a car is stopped at a stop light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143758.jpg", "caption": "a street sign on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340375.jpg", "caption": "a street sign that says foundation drive", "annotations": [{"polygon": [[108, 276], [102, 313], [148, 305], [172, 299], [188, 296], [202, 294], [226, 289], [235, 288], [253, 283], [265, 282], [288, 277], [322, 270], [330, 269], [347, 265], [348, 227], [348, 225], [326, 229], [316, 232], [289, 238], [276, 241], [248, 248], [226, 252], [201, 256]], "text": "FOUNDATION", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FOUNDATION", "recog_valid": true, "glyph_recog_text": "FOUNDATION", "glyph_recog_ld": 1.0}, {"polygon": [[373, 220], [374, 239], [410, 233], [442, 226], [442, 205]], "text": "DRIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "DRIVE", "recog_valid": true, "glyph_recog_text": "DRIVE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471453.jpg", "caption": "a dog sitting in the cab of a truck", "annotations": [{"polygon": [[183, 146], [174, 170], [273, 218], [283, 199]], "text": "Wayne", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Wayne", "recog_valid": true, "glyph_recog_text": "Wayne", "glyph_recog_ld": 1.0}, {"polygon": [[285, 200], [276, 219], [325, 244], [334, 228]], "text": "Catt", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Catt", "recog_valid": true, "glyph_recog_text": "Catt", "glyph_recog_ld": 1.0}, {"polygon": [[207, 192], [208, 199], [303, 243], [307, 238]], "text": "TRANSPORT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TRANSPORT", "recog_valid": true, "glyph_recog_text": "tspedhus.", "glyph_recog_ld": 1.1111098765503868e-06}, {"polygon": [[150, 455], [167, 458], [164, 473], [158, 492], [149, 512], [136, 512], [142, 499], [148, 481], [150, 471]], "text": "BRID", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "BID", "recog_valid": false, "glyph_recog_text": "0188", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471468.jpg", "caption": "a train with skis on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274860.jpg", "caption": "a group of motorcycles parked in front of a monument", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340406.jpg", "caption": "a group of people waiting at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307656.jpg", "caption": "a man in a suit and tie standing in front of a jail sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176587.jpg", "caption": "three women and a baby sitting at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471508.jpg", "caption": "a young boy in a blue jacket and blue pants", "annotations": [{"polygon": [[132, 512], [179, 482], [184, 496], [150, 512]], "text": "eH", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "e!", "recog_valid": false, "glyph_recog_text": "", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000274916.jpg", "caption": "a woman kneeling down next to a black dog", "annotations": [{"polygon": [[47, 333], [34, 361], [38, 365], [133, 360], [177, 355], [237, 349], [275, 341], [264, 317], [233, 322], [208, 323], [171, 327], [139, 327], [102, 330]], "text": "BOSTON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BOSTON", "recog_valid": true, "glyph_recog_text": "BOSTON", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000405991.jpg", "caption": "a man and a woman walking down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504298.jpg", "caption": "a group of elephants are being led by a man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000471554.jpg", "caption": "a man's feet are on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143874.jpg", "caption": "an old photo of a silver trailer with chairs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569867.jpg", "caption": "a woman posing for a photo next to a stop sign", "annotations": [{"polygon": [[339, 143], [338, 170], [345, 174], [399, 174], [411, 157], [412, 150], [409, 144]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[104, 136], [101, 163], [106, 168], [162, 170], [175, 155], [175, 144], [170, 141]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012822.jpg", "caption": "a man on skis is racing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406055.jpg", "caption": "a baseball game with a batter and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000012839.jpg", "caption": "a young boy playing a video game in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078380.jpg", "caption": "a young girl sitting at a table with a hot dog and a soda", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143921.jpg", "caption": "a red double decker bus on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569919.jpg", "caption": "a table topped with a pizza, a slice of cake and a drink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000143958.jpg", "caption": "a group of women sitting at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176728.jpg", "caption": "a clock hanging from a pole in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537196.jpg", "caption": "a small pizza in a box", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275057.jpg", "caption": "a skateboarder is doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000569975.jpg", "caption": "a cat and bird in a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340602.jpg", "caption": "a person on a snowboard doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000438909.jpg", "caption": "a large passenger jet taking off from an airport runway", "annotations": [{"polygon": [[125, 249], [125, 249], [181, 264], [184, 244], [126, 231]], "text": "orbest", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "orbest", "recog_valid": true, "glyph_recog_text": "orbest", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176765.jpg", "caption": "a man riding a bike next to a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537222.jpg", "caption": "a kitchen with a refrigerator, microwave, and stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373414.jpg", "caption": "people standing in front of a bus on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340659.jpg", "caption": "a man in an apron cooking food in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504524.jpg", "caption": "a black and white photo of a sign that says do not enter", "annotations": [{"polygon": [[157, 145], [157, 145], [204, 143], [216, 145], [223, 154], [224, 168], [220, 182], [210, 186], [157, 185]], "text": "DO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DO", "recog_valid": true, "glyph_recog_text": "DO", "glyph_recog_ld": 1.0}, {"polygon": [[252, 146], [305, 144], [348, 147], [351, 148], [351, 154], [340, 154], [340, 187], [251, 186]], "text": "NOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NOT", "recog_valid": true, "glyph_recog_text": "NOT", "glyph_recog_ld": 1.0}, {"polygon": [[169, 262], [322, 264], [330, 265], [335, 269], [337, 276], [337, 282], [331, 286], [337, 303], [169, 303]], "text": "ENTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ENTER", "recog_valid": true, "glyph_recog_text": "ENTER", "glyph_recog_ld": 1.0}, {"polygon": [[164, 385], [323, 389], [331, 391], [333, 394], [334, 404], [333, 412], [329, 415], [323, 417], [171, 414]], "text": "WRONG", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WRONG", "recog_valid": true, "glyph_recog_text": "WRONG", "glyph_recog_ld": 1.0}, {"polygon": [[195, 434], [201, 463], [291, 466], [305, 437]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537293.jpg", "caption": "a woman holding a baby while she is blow drying her hair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144091.jpg", "caption": "a green bus and a yellow bus on a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570078.jpg", "caption": "a man on a skateboard is going through cones", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209639.jpg", "caption": "a man wearing a suit and tie", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373492.jpg", "caption": "a sign that says stop harper", "annotations": [{"polygon": [[167, 216], [314, 228], [316, 301], [166, 292]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[131, 302], [344, 318], [347, 389], [131, 374]], "text": "HARPER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HARPER", "recog_valid": true, "glyph_recog_text": "HARPER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000307968.jpg", "caption": "a group of people playing a video game in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340736.jpg", "caption": "a buffet with many different types of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111369.jpg", "caption": "a suv is parked in the snow next to a parking meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275211.jpg", "caption": "a man selling hot dogs on a street corner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242446.jpg", "caption": "a man holding a frisbee in a convention hall", "annotations": [{"polygon": [[200, 286], [199, 310], [299, 272], [286, 252]], "text": "Radical", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Radical", "recog_valid": true, "glyph_recog_text": "Radical", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111385.jpg", "caption": "a man holding a surfboard on a beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242457.jpg", "caption": "a man and a woman are talking to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000340763.jpg", "caption": "a white motorcycle parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275245.jpg", "caption": "a sign for frisco grill street on the side of a building", "annotations": [{"polygon": [[331, 96], [333, 130], [177, 164], [174, 127]], "text": "FRISCO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FRISCO", "recog_valid": true, "glyph_recog_text": "FRISCO", "glyph_recog_ld": 1.0}, {"polygon": [[376, 170], [385, 281], [152, 325], [140, 311], [138, 216]], "text": "GRILLE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GRILLE", "recog_valid": true, "glyph_recog_text": "GRILLE", "glyph_recog_ld": 1.0}, {"polygon": [[350, 358], [343, 392], [189, 415], [189, 382]], "text": "STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "STREET", "recog_valid": true, "glyph_recog_text": "STREET", "glyph_recog_ld": 1.0}, {"polygon": [[336, 465], [356, 482], [351, 511], [311, 511], [259, 475]], "text": "ET", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "ET", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000176943.jpg", "caption": "a street lamp and a sign on a brick wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111422.jpg", "caption": "a man playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275263.jpg", "caption": "a clock is shown in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078667.jpg", "caption": "a yellow school bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373580.jpg", "caption": "a man holding a tennis racket and eating a piece of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504664.jpg", "caption": "a group of people on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000275292.jpg", "caption": "boats are docked at the shore of a lake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013176.jpg", "caption": "a man with a ponytail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177023.jpg", "caption": "an old black and white photo of a plane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111490.jpg", "caption": "a young boy standing on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406414.jpg", "caption": "the ark veterinary hospital", "annotations": [{"polygon": [[42, 303], [44, 320], [159, 301], [159, 286], [42, 303]], "text": "VETERINARY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "VETERINARY", "recog_valid": true, "glyph_recog_text": "VETERINARY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406420.jpg", "caption": "a man sitting on a bench reading a newspaper", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000209822.jpg", "caption": "a man holding a surfboard", "annotations": [{"polygon": [[237, 175], [253, 173], [253, 173], [260, 124], [260, 124], [247, 108], [247, 108], [306, 117], [306, 117], [302, 123], [302, 123], [288, 125], [288, 125], [276, 194], [276, 194], [267, 192], [267, 192], [239, 192]], "text": "Surf", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "yums", "recog_valid": false, "glyph_recog_text": "0二", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504736.jpg", "caption": "a clock on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046002.jpg", "caption": "a laptop with a video screen showing a group of people", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504766.jpg", "caption": "a laptop on a bed with a phone on the side", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242625.jpg", "caption": "a person is kite surfing in the ocean", "annotations": [{"polygon": [[218, 98], [240, 119], [234, 128], [209, 104]], "text": "REBEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "P", "recog_valid": false, "glyph_recog_text": "RE6EL", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308175.jpg", "caption": "a man on a motorcycle riding down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000373729.jpg", "caption": "a cat sitting in a sink", "annotations": [{"polygon": [[396, 318], [391, 329], [392, 337], [399, 344], [404, 349], [418, 355], [425, 341]], "text": "cola", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "eela", "recog_valid": false, "glyph_recog_text": "cola", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000078828.jpg", "caption": "a toilet with a ruler on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406550.jpg", "caption": "a man holding a piece of pizza on a paper plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013336.jpg", "caption": "a bathroom with a toilet, a water tank and a water heater", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000504861.jpg", "caption": "an old book with a picture of a refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000242721.jpg", "caption": "a parking meter in the snow", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472108.jpg", "caption": "a stop sign with a bush growing on it", "annotations": [{"polygon": [[237, 142], [326, 143], [330, 162], [310, 183], [233, 182]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111661.jpg", "caption": "a red train at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406579.jpg", "caption": "a red and yellow bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308277.jpg", "caption": "a group of people in red shirts posing with a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341061.jpg", "caption": "a busy city street at night", "annotations": [{"polygon": [[367, 158], [367, 195], [394, 195], [396, 156], [366, 154]], "text": "7", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "卜", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177255.jpg", "caption": "a plane flying through a blue sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210048.jpg", "caption": "a cake decorated with a plane", "annotations": [{"polygon": [[183, 438], [183, 467], [213, 470], [241, 474], [273, 474], [286, 473], [301, 446], [295, 444], [282, 449], [251, 450], [223, 446]], "text": "MERRY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "MERRY", "recog_valid": true, "glyph_recog_text": "MERRY", "glyph_recog_ld": 1.0}, {"polygon": [[292, 472], [301, 447], [354, 432], [376, 423], [385, 443], [368, 452], [342, 461], [322, 467]], "text": "YXMAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "XMAS", "recog_valid": false, "glyph_recog_text": "YXMAS", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[386, 442], [387, 418], [405, 408], [424, 394], [437, 380], [444, 369], [451, 361], [443, 390], [434, 403], [420, 419]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "HAPE", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 0.6000007999984}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000406662.jpg", "caption": "a man in white shirt and black shorts playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046260.jpg", "caption": "a red truck with a blue tarp on it", "annotations": [{"polygon": [[46, 89], [43, 119], [175, 131], [178, 112], [109, 97], [70, 87]], "text": "SE-VENDE", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "SE-VENDE", "recog_valid": true, "glyph_recog_text": "SE-VENDE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472250.jpg", "caption": "a truck with a large elephant on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439482.jpg", "caption": "a large truck with a house on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013528.jpg", "caption": "a train is pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013547.jpg", "caption": "a bathroom with a large shower and sink", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144620.jpg", "caption": "a couple sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000046322.jpg", "caption": "two people eating food in front of a blue wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308475.jpg", "caption": "a large airplane sitting on the tarmac", "annotations": [{"polygon": [[216, 194], [216, 211], [270, 224], [266, 211]], "text": "AIR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AIR", "recog_valid": true, "glyph_recog_text": "AIR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505098.jpg", "caption": "a baseball player is holding a bat and standing on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374072.jpg", "caption": "a man holding a hot dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111936.jpg", "caption": "people are boarding a train at a station", "annotations": [{"polygon": [[4, 92], [100, 123], [102, 112], [81, 99], [61, 92], [10, 85]], "text": "coachclass", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "coachclass", "recog_valid": true, "glyph_recog_text": "coachcleas", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144703.jpg", "caption": "a refrigerator with magnets and pictures on it", "annotations": [{"polygon": [[59, 172], [59, 172], [121, 170], [134, 137], [65, 143]], "text": "Randy", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Randy", "recog_valid": true, "glyph_recog_text": "Randy", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013636.jpg", "caption": "a dog and a cat laying on a bed", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000537925.jpg", "caption": "a large sign that is on the side of a street", "annotations": [{"polygon": [[218, 124], [203, 148], [240, 157], [306, 152], [307, 147], [302, 139], [272, 127]], "text": "Skyliner", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Skyliner", "recog_valid": true, "glyph_recog_text": "Skyliner", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341328.jpg", "caption": "a blue teddy bear sitting on top of a pile of garbage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210267.jpg", "caption": "three cell phones on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000111967.jpg", "caption": "a man and a woman are playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341359.jpg", "caption": "a stuffed bear wearing a sweater and jeans", "annotations": [{"polygon": [[146, 241], [228, 361], [193, 386], [110, 268]], "text": "HERSHEY'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HERSHEYS", "recog_valid": false, "glyph_recog_text": "HERSHEY'S", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[167, 370], [134, 323], [143, 318], [165, 348], [173, 365]], "text": "Chocolate", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Chocolate", "recog_valid": true, "glyph_recog_text": "Chuodee", "glyph_recog_ld": 0.44444506172770915}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570738.jpg", "caption": "a baseball player standing on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308600.jpg", "caption": "a man painting an elephant with colorful designs", "annotations": [{"polygon": [[298, 368], [299, 237], [370, 236], [377, 368]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "O", "recog_valid": false, "glyph_recog_text": "co", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079224.jpg", "caption": "a large display of fruit", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439678.jpg", "caption": "a group of people in red shirts standing together with a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570768.jpg", "caption": "a man playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505234.jpg", "caption": "a little girl sitting at a table with two plates of pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013715.jpg", "caption": "a bicycle is parked next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000308635.jpg", "caption": "a person is looking at a display of donuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112040.jpg", "caption": "a green and black truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243120.jpg", "caption": "a group of giraffes standing in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374192.jpg", "caption": "a street sweeper on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144831.jpg", "caption": "a large airplane parked on a runway", "annotations": [{"polygon": [[99, 327], [108, 321], [142, 366], [126, 374]], "text": "easyJet", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "easyJet", "recog_valid": true, "glyph_recog_text": "easyJet", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210389.jpg", "caption": "a woman in a white dress standing on a sidewalk", "annotations": [{"polygon": [[306, 184], [353, 192], [358, 185], [364, 182], [361, 174], [312, 162], [307, 169], [303, 173]], "text": "SUBWAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SUBUUAY", "recog_valid": false, "glyph_recog_text": "SUIBWAY", "glyph_recog_ld": 0.5714291836725947}, {"polygon": [[67, 38], [67, 67], [97, 75], [94, 49]], "text": "ta", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "ta", "recog_valid": true, "glyph_recog_text": "ta", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570852.jpg", "caption": "a small plane with a red and white paint job", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000472551.jpg", "caption": "a street sign and a traffic light with palm trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243187.jpg", "caption": "a group of zebras standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570874.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000570878.jpg", "caption": "a group of rams standing in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000439838.jpg", "caption": "an orange airplane with a propeller on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505384.jpg", "caption": "a living room with a couch, a table, and a mirror", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407091.jpg", "caption": "a clock on a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000144994.jpg", "caption": "a double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000374391.jpg", "caption": "a baby sleeping with teddy bears", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407159.jpg", "caption": "a group of girls playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112252.jpg", "caption": "a delta airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538250.jpg", "caption": "a black and white photo of a tram on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000013983.jpg", "caption": "a pizza shop with a neon sign", "annotations": [{"polygon": [[91, 154], [156, 115], [168, 141], [127, 174], [90, 199], [78, 194]], "text": "med", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ea", "recog_valid": false, "glyph_recog_text": "med", "glyph_recog_ld": 0.3333355555481482}, {"polygon": [[148, 160], [163, 152], [201, 153], [206, 163], [279, 168], [282, 183], [227, 210], [138, 189]], "text": "PIZZA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "pigea", "recog_valid": false, "glyph_recog_text": "PIZZA", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[380, 106], [419, 111], [413, 142], [380, 140]], "text": "BY", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "BY", "recog_valid": true, "glyph_recog_text": "BY", "glyph_recog_ld": 1.0}, {"polygon": [[431, 111], [487, 116], [488, 147], [440, 142]], "text": "THE", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "THE", "recog_valid": true, "glyph_recog_text": "THE", "glyph_recog_ld": 1.0}, {"polygon": [[19, 64], [15, 136], [50, 158], [53, 99], [42, 85], [27, 70]], "text": "ATM", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "<-2", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276168.jpg", "caption": "a large military plane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112385.jpg", "caption": "a man is standing on the beach with a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000177922.jpg", "caption": "a train pulling into a station with a red and yellow train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571141.jpg", "caption": "a skateboarder doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210703.jpg", "caption": "a man is walking in front of a mall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145178.jpg", "caption": "a man in a black shirt standing in an office", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505637.jpg", "caption": "a close up of a pink stove with a meter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505642.jpg", "caption": "a baby sitting on the floor in front of an open refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079671.jpg", "caption": "a no parking sign on a street corner", "annotations": [{"polygon": [[173, 238], [232, 208], [234, 228], [173, 255]], "text": "PARKING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PARKING", "recog_valid": true, "glyph_recog_text": "PARKING", "glyph_recog_ld": 1.0}, {"polygon": [[172, 263], [205, 247], [206, 267], [173, 280]], "text": "HERE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HERE", "recog_valid": true, "glyph_recog_text": "HERE", "glyph_recog_ld": 1.0}, {"polygon": [[175, 285], [228, 264], [230, 285], [172, 307]], "text": "CORNER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CORNER", "recog_valid": true, "glyph_recog_text": "CORNER", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210751.jpg", "caption": "a woman and a boy sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178020.jpg", "caption": "a man in a wetsuit holding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407413.jpg", "caption": "a kitchen and dining area in a small apartment", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210806.jpg", "caption": "a large white airplane sitting on top of an airport tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538517.jpg", "caption": "a woman in a kimono is standing next to a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000341916.jpg", "caption": "an air canada express plane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000210848.jpg", "caption": "a baseball player swinging his bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014244.jpg", "caption": "a man doing a trick on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112562.jpg", "caption": "a large orange truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079802.jpg", "caption": "a bus depot with many buses parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000079824.jpg", "caption": "air france airbus a320-214 airbus a320-214 airbus a320-214 airbus a320-214 airbus a320-", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407505.jpg", "caption": "a baseball player is about to swing at a pitch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014319.jpg", "caption": "a suitcase and a sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145411.jpg", "caption": "a blue sign with a road sign and a traffic sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000505864.jpg", "caption": "a man holding a teddy bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473099.jpg", "caption": "a toilet with a metal handrail and a sign on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309261.jpg", "caption": "a baseball player throwing a ball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112657.jpg", "caption": "a group of remote controls on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473125.jpg", "caption": "a man feeding a giraffe at a zoo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047151.jpg", "caption": "a street with a lighted intersection at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047164.jpg", "caption": "a man standing in front of a fruit stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000538687.jpg", "caption": "a cow walking down the street next to a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309322.jpg", "caption": "a phillips 66 sign hanging on the wall", "annotations": [{"polygon": [[280, 254], [274, 277], [266, 286], [213, 282], [202, 271], [210, 242], [223, 239], [273, 243]], "text": "66", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "66", "recog_valid": true, "glyph_recog_text": "66", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342130.jpg", "caption": "a desk with a laptop and a desktop computer", "annotations": [{"polygon": [[29, 160], [22, 136], [25, 131], [34, 129], [74, 130], [77, 158], [76, 165], [68, 173], [53, 174], [52, 166], [48, 161], [45, 159], [41, 159]], "text": "WORD", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "M6g9", "recog_valid": false, "glyph_recog_text": "WORD", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473206.jpg", "caption": "a baseball pitcher in the middle of throwing a pitch", "annotations": [{"polygon": [[97, 21], [101, 88], [444, 88], [438, 38]], "text": "MEDICAL", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "MEDICAL", "recog_valid": true, "glyph_recog_text": "MEDICAL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014484.jpg", "caption": "two vans parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178337.jpg", "caption": "a motorcycle is on display at a convention", "annotations": [{"polygon": [[361, 408], [337, 392], [330, 376], [331, 364], [337, 358], [350, 361], [346, 369], [347, 376], [352, 388], [364, 398], [371, 402]], "text": "BUILDER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cone", "recog_valid": false, "glyph_recog_text": "BUCBER", "glyph_recog_ld": 0.1666680555532407}, {"polygon": [[411, 349], [416, 343], [429, 347], [442, 351], [450, 356], [471, 368], [480, 378], [467, 379], [461, 374], [444, 361], [432, 356], [417, 351], [412, 350]], "text": "BIHOW", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "HONSS", "recog_valid": false, "glyph_recog_text": "BIROW", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[294, 342], [284, 382], [299, 427], [392, 427], [358, 413], [330, 395], [321, 369], [332, 350]], "text": "BULDER", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "婴", "recog_valid": false, "glyph_recog_text": "BULbER", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[197, 374], [200, 387], [198, 401], [185, 418], [174, 428], [241, 427], [238, 373]], "text": "IMAT", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "爱", "recog_valid": false, "glyph_recog_text": "MAT", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[167, 369], [175, 385], [173, 398], [158, 414], [174, 420], [188, 402], [193, 387], [190, 371]], "text": "ILDER", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "AJLDER", "recog_valid": false, "glyph_recog_text": "83011", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571554.jpg", "caption": "a group of people with skis and snowboards", "annotations": [{"polygon": [[424, 302], [430, 307], [411, 329], [405, 339], [400, 335], [410, 316]], "text": "SMITH", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "hIWE", "recog_valid": false, "glyph_recog_text": "SMITH", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211116.jpg", "caption": "a yellow truck with a crane on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014537.jpg", "caption": "a baseball player blowing bubbles", "annotations": [{"polygon": [[205, 195], [302, 175], [306, 226], [297, 217], [279, 221], [259, 233], [209, 228]], "text": "Dodger", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Dedgeu", "recog_valid": false, "glyph_recog_text": "Dodger", "glyph_recog_ld": 0.6666672222212963}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014546.jpg", "caption": "a person in the water holding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080090.jpg", "caption": "a red train traveling down a street near palm trees", "annotations": [{"polygon": [[43, 190], [83, 139], [88, 145], [49, 196]], "text": "CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CROSSING", "recog_valid": true, "glyph_recog_text": "eR2wm1国小", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[91, 186], [85, 192], [43, 149], [48, 143]], "text": "CROSSING", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "RAILROAI", "recog_valid": false, "glyph_recog_text": "LBDE", "glyph_recog_ld": 0.12500109374863277}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000243947.jpg", "caption": "a crowd of people walking down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473326.jpg", "caption": "a man riding a motorcycle on a road with hay bales", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211185.jpg", "caption": "a green double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276749.jpg", "caption": "a ski lift is in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211221.jpg", "caption": "two baseball players walking on a baseball field", "annotations": [{"polygon": [[348, 138], [351, 164], [405, 138], [403, 128], [385, 125], [351, 137]], "text": "Southern", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Southeru", "recog_valid": false, "glyph_recog_text": "Southern", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[161, 211], [167, 240], [209, 220], [207, 199], [161, 211]], "text": "South", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Sout", "recog_valid": false, "glyph_recog_text": "South", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276766.jpg", "caption": "a bus parked at a bus stop with a stormy sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000440611.jpg", "caption": "a green and white bus driving down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506162.jpg", "caption": "a plate of food with carrots and meat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178491.jpg", "caption": "a train station with several trains parked on the tracks", "annotations": [{"polygon": [[470, 429], [471, 423], [473, 420], [470, 418], [463, 415], [460, 417], [444, 411], [344, 386], [340, 384], [337, 385], [336, 386], [338, 387], [340, 388], [342, 389], [449, 419], [459, 421], [458, 425]], "text": "CUPERLINER", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "eucEAenNEB", "recog_valid": false, "glyph_recog_text": "EUPERLINER", "glyph_recog_ld": 0.3000006999993}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276801.jpg", "caption": "a man and a woman are standing in a street with a goat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000080203.jpg", "caption": "a train at a station with a platform and a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244049.jpg", "caption": "a man riding a surfboard in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211283.jpg", "caption": "texans wide receiver julio jones", "annotations": [{"polygon": [[225, 274], [261, 256], [245, 216], [210, 234]], "text": "12", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "12", "recog_valid": true, "glyph_recog_text": "心", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[122, 286], [168, 300], [202, 324], [200, 356], [182, 355], [130, 311], [122, 294]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145748.jpg", "caption": "a red and white biplane parked on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000112978.jpg", "caption": "a man in a suit and tie giving a presentation to a crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047451.jpg", "caption": "a blue parking meter with a sign that says back in line", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000571747.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145767.jpg", "caption": "a hallway with two doors and a plant on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244074.jpg", "caption": "a street sign with a traffic light attached to it", "annotations": [{"polygon": [[145, 274], [143, 305], [199, 294], [195, 264]], "text": "Main", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Main", "recog_valid": true, "glyph_recog_text": "Main", "glyph_recog_ld": 1.0}, {"polygon": [[212, 259], [213, 291], [295, 273], [290, 242]], "text": "Street", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Street", "recog_valid": true, "glyph_recog_text": "Street", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014698.jpg", "caption": "a woman riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276849.jpg", "caption": "a bed with a canopy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407930.jpg", "caption": "a table with a cup of coffee and a phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539016.jpg", "caption": "a red door with graffiti on it and a sign that says no parking", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539023.jpg", "caption": "people are standing under tents with produce", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342417.jpg", "caption": "a living room with couches and a staircase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000276894.jpg", "caption": "a man in a suit and tie standing next to a black limo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000407972.jpg", "caption": "a mcdonalds restaurant with a plane on top of it", "annotations": [{"polygon": [[261, 248], [261, 248], [278, 250], [298, 260], [361, 267], [371, 271], [373, 283], [334, 281], [289, 276], [283, 279], [255, 276]], "text": "McDonald's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "(Y cDonald's", "recog_valid": false, "glyph_recog_text": "McDonald's", "glyph_recog_ld": 0.7500002083331597}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309679.jpg", "caption": "a white keyboard and mouse on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342478.jpg", "caption": "a bunch of bananas hanging from a metal rack", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000014819.jpg", "caption": "a clock is sitting on a window sill next to a book", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113132.jpg", "caption": "a laptop computer with a picture of a person on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000178682.jpg", "caption": "two women sitting on a bench talking to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539158.jpg", "caption": "a bathroom with graffiti on the walls and a toilet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539174.jpg", "caption": "a large group of luggage in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342583.jpg", "caption": "a fire truck parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309817.jpg", "caption": "a red tie tied to a microphone stand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000145989.jpg", "caption": "an orange train on the tracks", "annotations": [{"polygon": [[1, 148], [0, 209], [48, 215], [49, 161], [26, 155]], "text": "F", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "中", "recog_valid": false, "glyph_recog_text": "w", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375380.jpg", "caption": "two men standing in front of a red building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113236.jpg", "caption": "a stop sign with a blue sky in the background", "annotations": [{"polygon": [[102, 193], [313, 202], [324, 292], [107, 296], [100, 194]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277088.jpg", "caption": "a street at night with a sign that says no parking", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506470.jpg", "caption": "a living room with a television, couch and a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539242.jpg", "caption": "a food truck parked in front of a building", "annotations": [{"polygon": [[198, 220], [208, 242], [314, 216], [301, 192], [220, 213]], "text": "Gastronomo ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Goslronono", "recog_valid": false, "glyph_recog_text": "Gastronomo", "glyph_recog_ld": 0.7000002999996999}, {"polygon": [[210, 239], [221, 264], [304, 244], [300, 214]], "text": "Vagabundo", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Vagabund", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000309880.jpg", "caption": "a bunch of stuffed animals in a store window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473723.jpg", "caption": "a white bird is walking on a railing near the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473733.jpg", "caption": "a man on a skateboard doing a trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000047774.jpg", "caption": "a man reading a newspaper on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506534.jpg", "caption": "a large airplane sitting on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277165.jpg", "caption": "a blender filled with food on a counter", "annotations": [{"polygon": [[191, 380], [196, 386], [203, 389], [210, 394], [217, 398], [226, 400], [232, 403], [234, 416], [222, 413], [213, 410], [206, 407], [200, 401], [192, 396]], "text": "NUTRi", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "MUTR", "recog_valid": false, "glyph_recog_text": "NUTR", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000473776.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375473.jpg", "caption": "a black and white photo of a room full of furniture", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113328.jpg", "caption": "a man standing in front of a store with umbrellas hanging from the ceiling", "annotations": [{"polygon": [[147, 84], [156, 96], [171, 85], [179, 80], [188, 77], [198, 76], [213, 76], [224, 79], [236, 84], [244, 90], [251, 79], [244, 73], [235, 69], [222, 64], [209, 62], [197, 62], [184, 64], [172, 68], [160, 75]], "text": "UMBRELLAS", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "UMBRELLAS", "recog_valid": true, "glyph_recog_text": "UMBRELLAS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211632.jpg", "caption": "a mirror is on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277202.jpg", "caption": "a woman holding a glass of wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000572121.jpg", "caption": "a computer monitor, keyboard, and mouse on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000572165.jpg", "caption": "a computer keyboard is shown in the dark", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342807.jpg", "caption": "a group of people sitting at a table", "annotations": [{"polygon": [[116, 122], [128, 109], [149, 104], [160, 96], [222, 94], [235, 85], [278, 84], [279, 109], [263, 112], [251, 155], [119, 158]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "5o", "recog_valid": false, "glyph_recog_text": "M", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000572194.jpg", "caption": "a soccer player in red and white is kicking the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000441152.jpg", "caption": "two pictures of cupcakes with flowers and a chocolate covered strawberry", "annotations": [{"polygon": [[464, 431], [462, 425], [461, 418], [462, 413], [464, 407], [470, 403], [474, 401], [478, 399], [485, 398], [490, 399], [495, 402], [499, 403], [501, 406], [504, 411], [505, 418], [500, 419], [499, 413], [496, 410], [492, 407], [485, 405], [480, 406], [478, 407], [475, 408], [470, 417], [472, 428]], "text": "hapatite.com", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "", "recog_valid": false, "glyph_recog_text": "NopaeLoet", "glyph_recog_ld": 1.1111098765503868e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277331.jpg", "caption": "a blue semi truck parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000441180.jpg", "caption": "a baby is sitting in a bathtub with a toothbrush", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146274.jpg", "caption": "a menu is displayed on a wall in a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211811.jpg", "caption": "a small airplane sitting in a building with a large window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375654.jpg", "caption": "a hallway with a pink wall and a cabinet with a lot of items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375659.jpg", "caption": "a tennis player is about to hit a ball", "annotations": [{"polygon": [[198, 118], [198, 118], [280, 99], [284, 116], [202, 134]], "text": "J.P.Morgan", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "J.PMorgan", "recog_valid": false, "glyph_recog_text": "J.P.Morgan", "glyph_recog_ld": 0.9000000999999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000572283.jpg", "caption": "a bronze plaque with a man and a woman on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000375680.jpg", "caption": "a blue bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277389.jpg", "caption": "a baseball player throwing a ball", "annotations": [{"polygon": [[84, 211], [84, 254], [161, 252], [186, 209]], "text": "EIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EIC", "recog_valid": true, "glyph_recog_text": "EIC", "glyph_recog_ld": 1.0}, {"polygon": [[253, 277], [279, 271], [301, 282], [291, 294], [284, 299], [268, 301], [260, 298]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Z", "recog_valid": false, "glyph_recog_text": "4", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[380, 230], [380, 258], [410, 261], [412, 232]], "text": "ON", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "DN", "recog_valid": false, "glyph_recog_text": "ON", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[302, 42], [302, 71], [372, 102], [376, 100], [376, 46]], "text": "94", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "", "recog_valid": false, "glyph_recog_text": "94", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506785.jpg", "caption": "two motorcycles racing on a track", "annotations": [{"polygon": [[-3, 109], [66, 96], [109, 98], [114, 142], [-1, 176]], "text": "Hertz", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "Hertz", "recog_valid": true, "glyph_recog_text": "Hertz", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000342963.jpg", "caption": "three people sitting on a couch", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015289.jpg", "caption": "a double decker bus driving down a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000506808.jpg", "caption": "a laptop computer on a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015297.jpg", "caption": "a cat laying on a person's feet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277461.jpg", "caption": "a large airplane on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000113634.jpg", "caption": "australia - cattle - mccullough's farm - mccullough's farm - mccullough's farm - mccullough", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000539631.jpg", "caption": "president obama presents the president's award to the president", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277491.jpg", "caption": "a man and a woman playing frisbee", "annotations": [{"polygon": [[159, 206], [195, 235], [174, 243], [147, 207]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "T", "recog_valid": false, "glyph_recog_text": "4", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146420.jpg", "caption": "a man swinging a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015356.jpg", "caption": "a bus is parked at night on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000211969.jpg", "caption": "a large passenger jet taking off from an airport runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146454.jpg", "caption": "a person kiteboarding in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000441379.jpg", "caption": "a group of sheep running in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277550.jpg", "caption": "a clock hanging from a ceiling in a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000572487.jpg", "caption": "a traffic light on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244835.jpg", "caption": "a group of people sitting at a table with laptops", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000244844.jpg", "caption": "a little girl eating a hot dog at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474268.jpg", "caption": "a group of people on the beach with surfboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000441504.jpg", "caption": "three children sitting on the ground near a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015524.jpg", "caption": "a laptop computer sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474289.jpg", "caption": "a man swinging a tennis racket on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000572611.jpg", "caption": "three men sitting at a table with beer and food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000277722.jpg", "caption": "a red fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343264.jpg", "caption": "a group of young boys playing soccer on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000310501.jpg", "caption": "a kitchen with a chalkboard on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000441605.jpg", "caption": "a pile of carrots and oranges on a table", "annotations": [{"polygon": [[401, 97], [382, 117], [405, 135], [418, 113]], "text": "KP", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "长P", "recog_valid": false, "glyph_recog_text": "KP", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000048394.jpg", "caption": "a stop sign and a car driving by at night", "annotations": [{"polygon": [[56, 142], [120, 152], [127, 122], [61, 105], [56, 141]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376084.jpg", "caption": "a yellow dump truck driving down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343322.jpg", "caption": "a blue car with surfboards on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507166.jpg", "caption": "a black and white photo of a man in a suit", "annotations": [{"polygon": [[294, 335], [303, 342], [311, 352], [315, 360], [317, 370], [329, 368], [325, 355], [320, 344], [311, 335], [301, 327]], "text": "STUDIOS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S10016", "recog_valid": false, "glyph_recog_text": "BTUDNOS", "glyph_recog_ld": 1.4285693876736616e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474401.jpg", "caption": "a laptop computer on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212271.jpg", "caption": "a man sitting on a bench", "annotations": [{"polygon": [[370, 343], [411, 335], [414, 361], [372, 365]], "text": "rival", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "nval", "recog_valid": false, "glyph_recog_text": "rival", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[415, 338], [475, 329], [482, 352], [418, 361]], "text": "schools", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "schools", "recog_valid": true, "glyph_recog_text": "schools", "glyph_recog_ld": 1.0}, {"polygon": [[84, 48], [111, 50], [104, 109], [132, 89], [161, 107], [193, 93], [223, 39], [236, 55], [237, 86], [223, 111], [238, 130], [260, 106], [253, 146], [223, 166], [-3, 191]], "text": "dish,", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "1624", "recog_valid": false, "glyph_recog_text": "dish,", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000048441.jpg", "caption": "a white truck with an orange stripe on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081222.jpg", "caption": "a man swinging a tennis racket", "annotations": [{"polygon": [[238, 284], [423, 288], [414, 348], [233, 342]], "text": "LEAF", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LEAF", "recog_valid": true, "glyph_recog_text": "LEAF", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000310607.jpg", "caption": "a stop sign on a street corner with a car parked in front of it", "annotations": [{"polygon": [[222, 171], [322, 203], [329, 204], [327, 152], [223, 109]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000572769.jpg", "caption": "an old photo of a hotel with horses and people in front", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000146786.jpg", "caption": "a man holding a stop sign in front of a construction site", "annotations": [{"polygon": [[179, 132], [182, 164], [242, 145], [242, 113]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000310633.jpg", "caption": "a birthday cake on a table", "annotations": [{"polygon": [[242, 178], [294, 201], [299, 217], [279, 221], [235, 196]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Happe", "recog_valid": false, "glyph_recog_text": "Happy", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[288, 195], [310, 221], [324, 202], [301, 172]], "text": "50", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "50", "recog_valid": true, "glyph_recog_text": "50", "glyph_recog_ld": 1.0}, {"polygon": [[388, 230], [388, 230], [431, 268], [453, 286], [474, 271], [424, 237], [408, 216]], "text": "HAPPY BIRTHDAY celebrate", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "国东版", "recog_valid": false, "glyph_recog_text": "ARP", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[374, 243], [432, 289], [447, 280], [391, 228]], "text": "Celebrate", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "CNEH", "recog_valid": false, "glyph_recog_text": "Celebrate", "glyph_recog_ld": 0.1111120987643347}, {"polygon": [[50, 213], [107, 217], [121, 180], [67, 174]], "text": "50", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "动", "recog_valid": false, "glyph_recog_text": "50", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[346, 340], [392, 398], [403, 394], [387, 360], [368, 335], [354, 337]], "text": "Celebrat", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Cleoia", "recog_valid": false, "glyph_recog_text": "Celebrat", "glyph_recog_ld": 0.5000006249992187}, {"polygon": [[353, 333], [402, 392], [458, 355], [444, 333], [420, 306]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "un", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[409, 214], [427, 240], [472, 273], [492, 255], [427, 200]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "配品产", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 1.9999959999239536e-06}, {"polygon": [[255, 211], [248, 229], [286, 245], [295, 231], [279, 221]], "text": "Jim", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tie", "recog_valid": false, "glyph_recog_text": "Jlm", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114027.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[126, 342], [125, 374], [353, 387], [352, 349]], "text": "Bilba rolak", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "B ilbar rolak", "recog_valid": false, "glyph_recog_text": "Bilba rolak", "glyph_recog_ld": 0.8461539644969504}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000572786.jpg", "caption": "a yellow taxi cab and a yellow food truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081294.jpg", "caption": "a man and woman in scout uniforms standing next to each other", "annotations": [{"polygon": [[349, 112], [356, 111], [358, 101], [365, 91], [373, 82], [382, 80], [389, 78], [402, 79], [414, 87], [419, 95], [427, 86], [414, 78], [403, 74], [387, 73], [372, 78], [361, 85], [352, 102]], "text": "TROOP 737", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "全", "recog_valid": false, "glyph_recog_text": "TROOP 737", "glyph_recog_ld": 0.1111120987643347}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000114077.jpg", "caption": "a large building with a clock tower on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000408995.jpg", "caption": "a cat on a person's leg", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000310693.jpg", "caption": "a large green truck parked on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343470.jpg", "caption": "a group of people skiing down a slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015816.jpg", "caption": "a person riding a motorcycle on a track", "annotations": [{"polygon": [[234, 193], [212, 231], [213, 233], [223, 237], [242, 199]], "text": "5", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Blau", "recog_valid": false, "glyph_recog_text": "5", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000310753.jpg", "caption": "a large clock on the side of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000048616.jpg", "caption": "a woman and two children eating pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212479.jpg", "caption": "a fighter jet is sitting on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000048658.jpg", "caption": "people walking down a street in a city", "annotations": [{"polygon": [[330, 201], [370, 169], [373, 209], [332, 237]], "text": "Canon", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "tamon", "recog_valid": false, "glyph_recog_text": "Canon", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[0, 102], [61, 154], [59, 180], [-2, 135]], "text": "UJIFIUI", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "0OIRU", "recog_valid": false, "glyph_recog_text": "JJIFIUI", "glyph_recog_ld": 0.28571530612099116}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000572960.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[475, 319], [488, 312], [474, 290], [458, 298], [474, 320]], "text": "8", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "品", "recog_valid": false, "glyph_recog_text": "8", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000441891.jpg", "caption": "three giraffes standing in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000179753.jpg", "caption": "a group of people standing in front of an airplane", "annotations": [{"polygon": [[35, 197], [63, 199], [74, 200], [86, 210], [80, 222], [67, 217], [47, 219], [37, 225], [32, 232], [29, 226], [27, 219], [26, 213], [28, 204], [31, 200]], "text": "loudster", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Sfuhite", "recog_valid": false, "glyph_recog_text": "koudster", "glyph_recog_ld": 0.37500078124902336}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081461.jpg", "caption": "a fighter jet flying over a desert area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278101.jpg", "caption": "a group of people playing soccer in a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000015976.jpg", "caption": "a double decker bus is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245354.jpg", "caption": "a black and red train engine sitting on a platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147073.jpg", "caption": "a man walking past a wall with a toaster on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147091.jpg", "caption": "a man on a motorcycle in a rural area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000048789.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[316, 198], [335, 182], [349, 172], [372, 163], [365, 146], [356, 149], [338, 159], [314, 175], [304, 188]], "text": "FIELDER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FIELDEI", "recog_valid": false, "glyph_recog_text": "FIELDER", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[321, 200], [336, 227], [364, 218], [382, 207], [383, 196], [368, 170], [363, 170], [324, 196]], "text": "28", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "28", "recog_valid": true, "glyph_recog_text": "28", "glyph_recog_ld": 1.0}, {"polygon": [[244, 351], [263, 344], [280, 342], [292, 342], [287, 370], [280, 372], [253, 373], [247, 372], [246, 360]], "text": "55", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6", "recog_valid": false, "glyph_recog_text": "55", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278177.jpg", "caption": "a red, blue and yellow train sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245415.jpg", "caption": "people walking on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081576.jpg", "caption": "a group of people standing around a table with food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147122.jpg", "caption": "a kitchen with white cabinets and a large tote bag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000474803.jpg", "caption": "a red motorcycle with a bike rack on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278214.jpg", "caption": "a man wearing a red shirt", "annotations": [{"polygon": [[63, 378], [64, 410], [189, 408], [189, 372]], "text": "ARIZON", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "ARIZO", "recog_valid": false, "glyph_recog_text": "ARIZON", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000573184.jpg", "caption": "a military helicopter is taking off from an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540428.jpg", "caption": "a man and two boys posing with skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376590.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000311067.jpg", "caption": "a view of an airplane parked on a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278303.jpg", "caption": "a dog is petting a bird", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000048947.jpg", "caption": "a white jet sitting on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000343880.jpg", "caption": "a cat playing with a stuffed animal on a wooden floor", "annotations": [{"polygon": [[89, 440], [133, 437], [139, 512], [94, 511]], "text": "Wr", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "Wr", "recog_valid": true, "glyph_recog_text": "3", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000409433.jpg", "caption": "three cows in the dark with a tag on their head", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212838.jpg", "caption": "a man is eating a piece of food in four different pictures", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147314.jpg", "caption": "a man wearing a hat with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000442232.jpg", "caption": "a man eating a large slice of pizza", "annotations": [{"polygon": [[449, 261], [440, 293], [454, 291], [472, 264]], "text": "7", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "D", "recog_valid": false, "glyph_recog_text": "L", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000049017.jpg", "caption": "a dog laying on the floor next to an oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212859.jpg", "caption": "a man and a woman playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245631.jpg", "caption": "a parking meter in front of a body of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000507776.jpg", "caption": "a fire truck is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245638.jpg", "caption": "a yellow school bus parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540561.jpg", "caption": "a green truck with a yellow logo", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000573330.jpg", "caption": "a traffic light with a keep right sign on top", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000180119.jpg", "caption": "a stop sign and a traffic light on a street", "annotations": [{"polygon": [[114, 201], [113, 230], [221, 227], [223, 199]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147371.jpg", "caption": "a person kiteboarding in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000442332.jpg", "caption": "a street sign with a stop sign and a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000212974.jpg", "caption": "four pictures showing different types of cake, including a cake with orange slices", "annotations": [{"polygon": [[132, 318], [132, 318], [170, 293], [179, 300], [143, 330]], "text": "Fruit", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Fruit", "recog_valid": true, "glyph_recog_text": "Fruit", "glyph_recog_ld": 1.0}, {"polygon": [[132, 340], [153, 325], [156, 330], [173, 316], [171, 309], [177, 305], [179, 308], [196, 295], [204, 311], [183, 327], [180, 335], [172, 340], [170, 335], [143, 355]], "text": "Fruit Tingles", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Tingles", "recog_valid": false, "glyph_recog_text": "Fruit Tingles", "glyph_recog_ld": 0.5384618934908512}, {"polygon": [[29, 387], [52, 364], [84, 341], [113, 331], [124, 338], [130, 353], [129, 364], [112, 369], [95, 376], [55, 398], [42, 407]], "text": "SAVERS", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "SAVERS", "recog_valid": true, "glyph_recog_text": "SAVERS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344051.jpg", "caption": "a microwave oven sitting on top of a stove", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147444.jpg", "caption": "a man in a blue uniform is kicking a soccer ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000049143.jpg", "caption": "a man kneeling on the floor with a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000180220.jpg", "caption": "a stop sign on a railroad track near a train", "annotations": [{"polygon": [[151, 198], [151, 198], [224, 192], [224, 237], [153, 241]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213003.jpg", "caption": "a man sitting in a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000081942.jpg", "caption": "a red fire hydrant on a brick sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000442400.jpg", "caption": "a laptop sitting on a desk with a drink and a box", "annotations": [{"polygon": [[149, 61], [148, 93], [230, 89], [230, 57]], "text": "BRAVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "BRAVE", "recog_valid": true, "glyph_recog_text": "BRAVE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016418.jpg", "caption": "a singapore airlines airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213034.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000409653.jpg", "caption": "a museum with a large airplane inside of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000245823.jpg", "caption": "a living room with a couch and a window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000376921.jpg", "caption": "a woman standing next to a moving truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016486.jpg", "caption": "an old black and white photo of a truck with a man standing next to it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213095.jpg", "caption": "a bus is driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000409706.jpg", "caption": "a woman is drawing on a piece of paper with a doughnut", "annotations": [{"polygon": [[501, 411], [473, 447], [468, 434], [496, 396]], "text": "smart", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "snat", "recog_valid": false, "glyph_recog_text": "5造大名", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213103.jpg", "caption": "a man taking a picture of a table", "annotations": [{"polygon": [[97, 75], [84, 41], [114, 28], [148, 15], [197, 12], [195, 54], [169, 51], [140, 55], [115, 66]], "text": "ZYWIEC", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "OHIWYT", "recog_valid": false, "glyph_recog_text": "ZYWIEC", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213117.jpg", "caption": "a baseball player running to home plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000573572.jpg", "caption": "a yellow and black airplane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213124.jpg", "caption": "a blue bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475277.jpg", "caption": "a white horse with a blue nose and black eyes", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016547.jpg", "caption": "a batter swinging a bat at a baseball game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000540864.jpg", "caption": "a woman in white dress playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000016578.jpg", "caption": "a car is parked at an intersection in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000082121.jpg", "caption": "a person jumping on skis in the air", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000573651.jpg", "caption": "a street sign is on the sidewalk in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344280.jpg", "caption": "a clock on a pole in a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278744.jpg", "caption": "a man in white playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475357.jpg", "caption": "a baseball player swinging his bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377090.jpg", "caption": "a traffic light hanging from a wire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000442654.jpg", "caption": "a stop sign with graffiti on it", "annotations": [{"polygon": [[189, 70], [246, 57], [243, 86], [186, 99], [180, 93], [181, 83], [184, 75]], "text": "ONE", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ONE", "recog_valid": true, "glyph_recog_text": "ONE", "glyph_recog_ld": 1.0}, {"polygon": [[261, 54], [262, 82], [321, 72], [322, 57], [334, 37]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}, {"polygon": [[192, 207], [351, 181], [360, 182], [366, 192], [367, 203], [362, 218], [353, 229], [341, 232], [333, 232], [329, 272], [177, 293], [169, 284], [167, 268], [175, 227], [179, 217], [185, 211]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[217, 313], [285, 310], [291, 316], [287, 335], [299, 350], [224, 358]], "text": "WAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAR", "recog_valid": true, "glyph_recog_text": "WAR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246095.jpg", "caption": "a train is parked on a train track next to a forest", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000311644.jpg", "caption": "a bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475485.jpg", "caption": "a chair sitting in front of a wall with several clocks on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541029.jpg", "caption": "a laptop computer sitting on a table next to a bottle of water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278907.jpg", "caption": "a sign that says stop wash your hands", "annotations": [{"polygon": [[190, 128], [326, 135], [340, 136], [347, 142], [348, 153], [344, 164], [336, 171], [321, 172], [320, 189], [319, 192], [312, 191], [279, 192], [234, 190], [194, 190], [194, 190], [178, 183], [176, 171], [175, 142], [179, 133]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[68, 200], [183, 203], [182, 238], [74, 237], [65, 200]], "text": "WASH", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "WASH", "recog_valid": true, "glyph_recog_text": "WASH", "glyph_recog_ld": 1.0}, {"polygon": [[196, 205], [198, 203], [296, 206], [303, 211], [303, 217], [301, 222], [298, 224], [301, 229], [303, 238], [303, 241], [262, 241], [233, 240], [203, 238], [203, 225]], "text": "YOUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "YOUR", "recog_valid": true, "glyph_recog_text": "YOUR", "glyph_recog_ld": 1.0}, {"polygon": [[317, 208], [316, 239], [439, 243], [443, 237], [444, 232], [444, 219], [442, 209], [435, 207], [400, 208], [338, 207], [318, 205]], "text": "HANDS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "HANDS", "recog_valid": true, "glyph_recog_text": "HANDS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000082301.jpg", "caption": "two men sitting on a couch", "annotations": [{"polygon": [[340, 273], [373, 271], [388, 269], [390, 265], [397, 267], [399, 272], [400, 284], [389, 285], [371, 288], [343, 299]], "text": "LONGHORN'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A6UGHORNY", "recog_valid": false, "glyph_recog_text": "LOHGHORKRS", "glyph_recog_ld": 0.40000059999939996}, {"polygon": [[357, 316], [366, 317], [376, 320], [389, 322], [403, 319], [418, 313], [433, 303], [436, 305], [437, 320], [426, 322], [415, 328], [402, 332], [387, 335], [372, 337], [360, 338]], "text": "SALOON", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "RALOON", "recog_valid": false, "glyph_recog_text": "SALOON", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000147839.jpg", "caption": "a police truck parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246146.jpg", "caption": "a large jet airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000573829.jpg", "caption": "a banana with writing on it", "annotations": [{"polygon": [[99, 209], [104, 250], [189, 238], [240, 235], [304, 235], [351, 238], [360, 208]], "text": "PROPENSITY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PrOpENSIT!", "recog_valid": false, "glyph_recog_text": "PROPENSITY", "glyph_recog_ld": 0.7000002999996999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000180613.jpg", "caption": "three young men standing next to each other holding surfboards", "annotations": [{"polygon": [[376, 93], [398, 80], [413, 63], [420, 42], [438, 55], [443, 67], [429, 96], [406, 114], [382, 118]], "text": "WAVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "MAVE", "recog_valid": false, "glyph_recog_text": "WAVE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344456.jpg", "caption": "a black and white photo of a bench in a subway station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000278921.jpg", "caption": "people crossing the street in an asian city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377244.jpg", "caption": "three red and white planes flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475551.jpg", "caption": "a pizza with pepperoni, onions and olives on a pan", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541094.jpg", "caption": "a street sign with two different signs on it", "annotations": [{"polygon": [[96, 138], [171, 159], [172, 143], [101, 129]], "text": "CALLEJON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CALLEION", "recog_valid": false, "glyph_recog_text": "CALLEJON", "glyph_recog_ld": 0.8750001562498047}, {"polygon": [[124, 210], [119, 223], [202, 241], [203, 229]], "text": "ALGODONES", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALGODONES", "recog_valid": true, "glyph_recog_text": "ALGOOONES", "glyph_recog_ld": 0.8888890123455419}, {"polygon": [[177, 397], [219, 378], [228, 312], [188, 330]], "text": "AV", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "4>", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[131, 212], [307, 250], [312, 196], [140, 152]], "text": "ALAMO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ALAHO", "recog_valid": false, "glyph_recog_text": "ALAMO", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[335, 265], [334, 285], [373, 267], [371, 246]], "text": "DENTISTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DEs", "recog_valid": false, "glyph_recog_text": "DENTISTA", "glyph_recog_ld": 0.2500009374988281}, {"polygon": [[387, 217], [386, 236], [442, 250], [440, 230]], "text": "DENTISTA", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "DENTISTA", "recog_valid": true, "glyph_recog_text": "DENTISTA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377301.jpg", "caption": "a traffic light with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475626.jpg", "caption": "a stop sign with a no right turn sign on it", "annotations": [{"polygon": [[166, 103], [327, 92], [329, 155], [153, 160]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[213, 243], [255, 244], [256, 276], [212, 275]], "text": "NO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "NO", "recog_valid": true, "glyph_recog_text": "NO", "glyph_recog_ld": 1.0}, {"polygon": [[188, 289], [186, 322], [287, 324], [287, 290]], "text": "RIGHT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RIGHT", "recog_valid": true, "glyph_recog_text": "RIGHT", "glyph_recog_ld": 1.0}, {"polygon": [[183, 340], [284, 342], [283, 377], [184, 375]], "text": "TURN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TURN", "recog_valid": true, "glyph_recog_text": "TURN", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541208.jpg", "caption": "a boy sitting on a bench outside of a restaurant", "annotations": [{"polygon": [[250, 149], [251, 171], [295, 180], [299, 162]], "text": "GUMP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GUMP", "recog_valid": true, "glyph_recog_text": "GUMP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508449.jpg", "caption": "a man and woman holding a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475692.jpg", "caption": "a street sign with a qr code on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344629.jpg", "caption": "a large pizza on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000344644.jpg", "caption": "a black and white photo of a wedding ceremony on the beach", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000574025.jpg", "caption": "two boxes of donuts sitting on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000049738.jpg", "caption": "a red knitted mitten", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000311914.jpg", "caption": "a stop sign is shown on the side of a school bus", "annotations": [{"polygon": [[335, 201], [415, 205], [413, 315], [336, 297]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "复", "recog_valid": false, "glyph_recog_text": "0-0", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377467.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000410245.jpg", "caption": "two children playing a video game on a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508567.jpg", "caption": "a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377513.jpg", "caption": "a man eating a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377545.jpg", "caption": "a dog wearing a hat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377572.jpg", "caption": "boats are docked at the edge of a harbor", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148205.jpg", "caption": "a train traveling down the tracks with a truck behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000443136.jpg", "caption": "two men sitting at a table with a large pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475929.jpg", "caption": "a street sign on a pole", "annotations": [{"polygon": [[155, 51], [155, 51], [193, 53], [191, 90], [159, 89]], "text": "TO", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "TO", "recog_valid": true, "glyph_recog_text": "TO", "glyph_recog_ld": 1.0}, {"polygon": [[226, 52], [226, 92], [338, 104], [338, 64]], "text": "STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "STREET", "recog_valid": true, "glyph_recog_text": "STREET", "glyph_recog_ld": 1.0}, {"polygon": [[169, 126], [203, 127], [201, 167], [177, 167]], "text": "Y", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": ">", "recog_valid": false, "glyph_recog_text": ">", "glyph_recog_ld": 1.0}, {"polygon": [[209, 129], [209, 167], [346, 178], [342, 138]], "text": "STREET", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STREET", "recog_valid": true, "glyph_recog_text": "STREET", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279324.jpg", "caption": "a woman is playing with a dog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148267.jpg", "caption": "a man playing a wii game in a room", "annotations": [{"polygon": [[368, 100], [447, 90], [449, 132], [369, 138]], "text": "Wii", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Wii", "recog_valid": true, "glyph_recog_text": "Wii", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246573.jpg", "caption": "a double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000574257.jpg", "caption": "an airplane wing with a view of the ocean and mountains", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508725.jpg", "caption": "a red and white train on the tracks next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000181047.jpg", "caption": "a plate of food on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475974.jpg", "caption": "a man in a green shirt riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377672.jpg", "caption": "a small airplane on the ground in an open field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000181064.jpg", "caption": "a man standing next to a train at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017226.jpg", "caption": "four yellow airplanes flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148306.jpg", "caption": "a traffic light has texts and \"walk\"", "annotations": [{"polygon": [[196, 356], [332, 386], [327, 320], [190, 285]], "text": "WALK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WAK", "recog_valid": false, "glyph_recog_text": "WALK", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[191, 135], [306, 168], [309, 113], [191, 70]], "text": "DONT", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "上2", "recog_valid": false, "glyph_recog_text": "DONT", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[192, 209], [318, 246], [313, 184], [186, 144]], "text": "WALK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "WALK", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475988.jpg", "caption": "a red and white motorcycle parked in a tent", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541529.jpg", "caption": "three people posing for a picture with snowboards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000475999.jpg", "caption": "a woman is looking at a display of brownies", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000377709.jpg", "caption": "a white bus on a street at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246656.jpg", "caption": "a man on a beach catching a frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000312199.jpg", "caption": "a truck with a load of oranges on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148366.jpg", "caption": "a red brick building with green shutters and a scooter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000181149.jpg", "caption": "a large building with two towers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476061.jpg", "caption": "a white and yellow bus driving down a road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508836.jpg", "caption": "a train traveling through a lush green area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345009.jpg", "caption": "a small plane on a small boat in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213936.jpg", "caption": "an older woman looking at a clock with a picture of a clock on it", "annotations": [{"polygon": [[273, 318], [276, 209], [356, 211], [354, 328]], "text": "R", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "5", "recog_valid": false, "glyph_recog_text": "c", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000181168.jpg", "caption": "a man on a skateboard", "annotations": [{"polygon": [[496, 284], [481, 250], [376, 259], [377, 302]], "text": "CAME", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AWYT", "recog_valid": false, "glyph_recog_text": "CAME", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000312247.jpg", "caption": "a man sitting on a couch with a laptop computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246718.jpg", "caption": "a group of people sitting around a table with wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345024.jpg", "caption": "a clock on a mantle", "annotations": [{"polygon": [[205, 157], [181, 171], [197, 191], [215, 175]], "text": "VIII", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "ViI!", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000213988.jpg", "caption": "a wooden boardwalk with benches and a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000312292.jpg", "caption": "a parking meter on the sidewalk next to a parked car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000050151.jpg", "caption": "a pair of scissors on a wooden door", "annotations": [{"polygon": [[199, 138], [270, 138], [271, 172], [201, 169]], "text": "PULL", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PULL", "recog_valid": true, "glyph_recog_text": "PULL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000508913.jpg", "caption": "a green truck driving down the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017399.jpg", "caption": "a wooden grandfather clock in a room with a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000279543.jpg", "caption": "egyptair plane at the airport", "annotations": [{"polygon": [[139, 268], [147, 271], [144, 276], [195, 278], [185, 301], [123, 300], [126, 286]], "text": "EGYPTAIR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EGVCTRIR", "recog_valid": false, "glyph_recog_text": "EGYPTAIR", "glyph_recog_ld": 0.6250004687494141}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345084.jpg", "caption": "a store with a sign that says king kod", "annotations": [{"polygon": [[187, 148], [187, 132], [255, 107], [256, 127], [211, 142]], "text": "KING", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "KING", "recog_valid": true, "glyph_recog_text": "KING", "glyph_recog_ld": 1.0}, {"polygon": [[323, 109], [343, 86], [353, 98], [354, 121], [349, 142], [341, 153], [326, 165]], "text": "D", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "O", "recog_valid": false, "glyph_recog_text": "0", "glyph_recog_ld": 9.999900001056439e-06}, {"polygon": [[269, 169], [270, 209], [315, 174], [310, 122]], "text": "K.O.", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "tO", "recog_valid": false, "glyph_recog_text": "K.O.", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000443397.jpg", "caption": "a banana, almond butter, and a blender", "annotations": [{"polygon": [[338, 46], [318, 40], [296, 43], [303, 83], [373, 70], [376, 70], [383, 79], [396, 78], [407, 70], [447, 71], [447, 29], [438, 29], [429, 29], [424, 41], [388, 40]], "text": "Stony", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "Stonyfi", "recog_valid": false, "glyph_recog_text": "Stony", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000443427.jpg", "caption": "a man standing next to a red train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000312355.jpg", "caption": "a clock on a shelf next to books", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214055.jpg", "caption": "a large airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000410667.jpg", "caption": "a train engine with people on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000050222.jpg", "caption": "a motorcycle racer is riding on a track", "annotations": [{"polygon": [[241, 311], [147, 294], [151, 276], [244, 293]], "text": "SAMSUNG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SAMSUNG", "recog_valid": true, "glyph_recog_text": "SAMSUNG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345152.jpg", "caption": "a train is passing through a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148554.jpg", "caption": "a highway with a lot of cars on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000050256.jpg", "caption": "a blue garbage truck driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246875.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509023.jpg", "caption": "a group of men standing around a pizza box", "annotations": [{"polygon": [[39, 295], [66, 314], [83, 297], [81, 294], [69, 272], [56, 281]], "text": "TE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "TE", "recog_valid": true, "glyph_recog_text": "TE", "glyph_recog_ld": 1.0}, {"polygon": [[58, 329], [75, 346], [79, 347], [95, 333], [104, 316], [106, 312], [87, 297], [59, 324]], "text": "OLB", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "OLB", "recog_valid": true, "glyph_recog_text": "OLB", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246927.jpg", "caption": "a man sitting at a table with a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246932.jpg", "caption": "a man playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000050340.jpg", "caption": "a female tennis player is hitting the ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000017578.jpg", "caption": "a man cooking on a grill at a food festival", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000410797.jpg", "caption": "a tree with a sign on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115911.jpg", "caption": "a baseball player is swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000378055.jpg", "caption": "a man wearing a white shirt", "annotations": [{"polygon": [[264, 201], [305, 184], [312, 199], [269, 216]], "text": "RALLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RALLY", "recog_valid": true, "glyph_recog_text": "RALLY", "glyph_recog_ld": 1.0}, {"polygon": [[270, 223], [337, 198], [346, 223], [278, 243]], "text": "MED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MED", "recog_valid": true, "glyph_recog_text": "MED", "glyph_recog_ld": 1.0}, {"polygon": [[283, 249], [316, 236], [339, 227], [351, 223], [357, 235], [342, 249], [322, 257], [302, 262], [281, 269]], "text": "RESEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RESEA", "recog_valid": false, "glyph_recog_text": "RESEN", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000246988.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148703.jpg", "caption": "a street sign with a number of signs on it", "annotations": [{"polygon": [[277, 237], [266, 257], [266, 264], [276, 263], [275, 271], [281, 271], [282, 264], [285, 265], [285, 271], [292, 271], [298, 239]], "text": "47", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "与", "recog_valid": false, "glyph_recog_text": "Lt", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[223, 132], [224, 164], [307, 162], [306, 127]], "text": "TOUR", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TOUR", "recog_valid": true, "glyph_recog_text": "TOUR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000541950.jpg", "caption": "a man sitting on a bench with a cat eating a sandwich", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000115973.jpg", "caption": "a white and red train is on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000443653.jpg", "caption": "a group of motorcycles parked next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476430.jpg", "caption": "a large jet airplane on the runway at an airport", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000050463.jpg", "caption": "a bus driving down a street with a city in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116006.jpg", "caption": "a blue and white train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476465.jpg", "caption": "a truck driving down a narrow road in the mountains", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000574770.jpg", "caption": "a dog jumping over a ramp at an event", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247114.jpg", "caption": "a woman holding two hot dogs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000050518.jpg", "caption": "a man sitting at a table eating a sandwich", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148828.jpg", "caption": "a dog wearing a life vest on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116074.jpg", "caption": "a man standing next to a motorcycle on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214413.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000443790.jpg", "caption": "a sheep is standing next to a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000050586.jpg", "caption": "yellowstone tour bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000574884.jpg", "caption": "a plane flying over a stage with a large crowd", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247217.jpg", "caption": "a gas station with a sign has texts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000574908.jpg", "caption": "a stuffed animal with a yellow shirt and yellow ribbon", "annotations": [{"polygon": [[211, 277], [200, 343], [246, 345], [247, 277], [212, 273]], "text": "1", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S", "recog_valid": false, "glyph_recog_text": "-", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000083407.jpg", "caption": "a stop sign on a street corner", "annotations": [{"polygon": [[200, 191], [282, 195], [282, 173], [290, 172], [296, 162], [296, 152], [286, 138], [194, 125], [184, 143], [187, 158], [187, 180]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509390.jpg", "caption": "a woman walking on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214478.jpg", "caption": "a horse is tied to a carriage in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000148965.jpg", "caption": "a kitchen counter with a blender, coffee maker, and a blender", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116207.jpg", "caption": "the big ben clock tower in london", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214526.jpg", "caption": "a stand selling fresh lemonade and other food items", "annotations": [{"polygon": [[276, 191], [281, 223], [249, 232], [226, 236], [177, 245], [173, 245], [157, 247], [158, 222], [203, 211], [243, 201]], "text": "FRESH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "FRESH", "recog_valid": true, "glyph_recog_text": "FRESH", "glyph_recog_ld": 1.0}, {"polygon": [[212, 242], [292, 225], [333, 215], [335, 238], [283, 247], [250, 253], [211, 260]], "text": "LEMONADE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "LEMONADE", "recog_valid": true, "glyph_recog_text": "LEMONADE", "glyph_recog_ld": 1.0}, {"polygon": [[322, 145], [324, 161], [225, 189], [225, 177]], "text": "CHEESE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CHEESE", "recog_valid": true, "glyph_recog_text": "CHEESE", "glyph_recog_ld": 1.0}, {"polygon": [[127, 189], [129, 201], [190, 183], [187, 169]], "text": "POLISH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "POLISH", "recog_valid": true, "glyph_recog_text": "POLISH", "glyph_recog_ld": 1.0}, {"polygon": [[141, 164], [142, 179], [198, 161], [194, 143]], "text": "STEAK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STEAK", "recog_valid": true, "glyph_recog_text": "STEAK", "glyph_recog_ld": 1.0}, {"polygon": [[262, 116], [269, 135], [324, 114], [316, 94]], "text": "SAC", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "SAC", "recog_valid": true, "glyph_recog_text": "SAC", "glyph_recog_ld": 1.0}, {"polygon": [[195, 119], [201, 135], [317, 87], [288, 85], [271, 84]], "text": "CHICK", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CHICKL", "recog_valid": false, "glyph_recog_text": "CHICK", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[132, 148], [135, 164], [191, 140], [188, 122]], "text": "CAJEN", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CAJEN", "recog_valid": true, "glyph_recog_text": "CAJEN", "glyph_recog_ld": 1.0}, {"polygon": [[136, 108], [136, 125], [199, 96], [198, 84], [189, 85], [178, 87]], "text": "ITALIAN", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "ITALIAN", "recog_valid": true, "glyph_recog_text": "TALIAN", "glyph_recog_ld": 0.8571430612241983}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345602.jpg", "caption": "a chocolate cake with a heart shape on top", "annotations": [{"polygon": [[126, 356], [131, 363], [140, 371], [146, 379], [164, 384], [169, 390], [174, 397], [181, 397], [187, 398], [199, 401], [203, 401], [207, 404], [211, 409], [216, 414], [228, 409], [229, 418], [196, 419], [172, 425], [170, 428], [174, 431], [196, 428], [215, 427], [241, 423], [262, 415], [282, 402], [291, 394], [251, 385], [240, 379], [224, 382], [215, 371], [204, 368], [196, 364], [187, 361], [187, 357], [179, 355], [171, 351], [165, 345], [152, 338]], "text": "BIRTHDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "FripdAy", "recog_valid": false, "glyph_recog_text": "BIRTHDAY", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[171, 346], [180, 355], [185, 355], [190, 357], [199, 350], [201, 359], [199, 365], [203, 369], [214, 361], [219, 363], [216, 372], [224, 374], [227, 367], [247, 368], [247, 369], [247, 372], [230, 376], [220, 376], [219, 378], [224, 382], [239, 381], [249, 378], [261, 373], [282, 365], [288, 359], [257, 356], [237, 349], [223, 341], [209, 333], [206, 329], [186, 328]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HAPPy", "recog_valid": false, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000443909.jpg", "caption": "a group of men in uniform on a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476701.jpg", "caption": "a billboard advertising the dios church in mexico", "annotations": [{"polygon": [[273, 181], [270, 213], [350, 214], [350, 184]], "text": "DIOS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "DIOS", "recog_valid": true, "glyph_recog_text": "DIOS", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345635.jpg", "caption": "a giraffe eating a leaf", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509483.jpg", "caption": "a line of food trucks parked in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345648.jpg", "caption": "a train on a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345658.jpg", "caption": "a stop sign with a street sign on top of it", "annotations": [{"polygon": [[90, 267], [90, 274], [93, 279], [96, 283], [100, 286], [107, 288], [225, 292], [225, 267], [235, 267], [240, 266], [245, 263], [249, 258], [250, 253], [249, 249], [249, 245], [245, 239], [240, 236], [236, 234], [108, 225], [103, 227], [100, 231], [96, 236], [94, 242], [92, 265]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149086.jpg", "caption": "a man playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116326.jpg", "caption": "a group of people sitting at a table in a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149095.jpg", "caption": "a man riding a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000345719.jpg", "caption": "a motorcycle parked on the side of a mountain road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476791.jpg", "caption": "a red motorcycle parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509564.jpg", "caption": "a group of people on the back of a truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411263.jpg", "caption": "a person holding up a sheep in front of a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280224.jpg", "caption": "a large pink bus with a flower design on it", "annotations": [{"polygon": [[117, 131], [117, 149], [155, 146], [203, 134], [203, 118]], "text": "Sunnybrook", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Sunnybrook", "recog_valid": true, "glyph_recog_text": "Sunnybrook", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116396.jpg", "caption": "two trains parked at a train station with a platform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000050868.jpg", "caption": "a man in uniform standing next to a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509620.jpg", "caption": "a red bus parked at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214729.jpg", "caption": "a bus driving down a street with a building in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411341.jpg", "caption": "a man on a skateboard", "annotations": [{"polygon": [[54, 330], [78, 287], [153, 363], [133, 397]], "text": "BRETT", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "BRETT", "recog_valid": true, "glyph_recog_text": "BRETT", "glyph_recog_ld": 1.0}, {"polygon": [[28, 348], [34, 319], [46, 329], [68, 338], [86, 349], [95, 360], [88, 384], [64, 383], [42, 371], [30, 363]], "text": "KKE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "KKE", "recog_valid": true, "glyph_recog_text": "KKE", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116444.jpg", "caption": "a pizza with broccoli and tomatoes on a pan", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000050912.jpg", "caption": "a cat laying on a couch in a room", "annotations": [{"polygon": [[340, 345], [372, 345], [352, 315], [328, 314]], "text": "L", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": false, "glyph_recog_text": "L", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000050913.jpg", "caption": "a group of surfboards are lined up on a beach", "annotations": [{"polygon": [[324, 317], [348, 327], [344, 350], [317, 340], [316, 325]], "text": "KID", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "D", "recog_valid": false, "glyph_recog_text": "KID", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214766.jpg", "caption": "a stainless steel stove top oven with a digital display", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542449.jpg", "caption": "a group of people playing frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411385.jpg", "caption": "a clock with a frog on top of it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509702.jpg", "caption": "a table with teddy bears and food on it", "annotations": [{"polygon": [[325, 357], [324, 375], [356, 389], [370, 376], [350, 374], [333, 367], [330, 360]], "text": "ONEY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "JONEY", "recog_valid": false, "glyph_recog_text": "ONEY", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575260.jpg", "caption": "a bus driving down a street with trees in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000476967.jpg", "caption": "a clock on a pole", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542506.jpg", "caption": "a woman flying a kite in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509740.jpg", "caption": "a woman is standing in front of a counter with a bunch of cakes", "annotations": [{"polygon": [[294, 207], [367, 216], [372, 169], [340, 157], [284, 158], [277, 171], [279, 197]], "text": "Stinchley", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "skuhhey", "recog_valid": false, "glyph_recog_text": "Stinchley", "glyph_recog_ld": 0.333334074073251}, {"polygon": [[381, 217], [362, 190], [377, 176], [432, 134], [442, 139], [449, 149]], "text": "Stores", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Slores", "recog_valid": false, "glyph_recog_text": "Stores", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575280.jpg", "caption": "a man and a boy standing on a beach with surf boards", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411441.jpg", "caption": "a woman standing at a bus stop next to a blue bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000083769.jpg", "caption": "a fighter jet flying in the sky with a trail behind it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280396.jpg", "caption": "a black and white photo of two horses pulling a carriage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051021.jpg", "caption": "a computer desk with two monitors and a computer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000214892.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542573.jpg", "caption": "a woman in a bikini is leaning against a stop sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116607.jpg", "caption": "a truck is overturned by a crane on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149388.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000247700.jpg", "caption": "a busy street with many signs and signs", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116652.jpg", "caption": "a white pickup truck parked in front of a house", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575421.jpg", "caption": "three pizzas in boxes on a table", "annotations": [{"polygon": [[27, 295], [110, 270], [105, 283], [22, 305], [27, 295]], "text": "FRESH", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "FRESH", "recog_valid": true, "glyph_recog_text": "专男区子件", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000509914.jpg", "caption": "a man on a motorcycle in front of a building", "annotations": [{"polygon": [[280, 170], [271, 202], [306, 200], [291, 168]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "A", "recog_valid": true, "glyph_recog_text": "A", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411629.jpg", "caption": "a man in white playing tennis", "annotations": [{"polygon": [[75, 209], [88, 246], [136, 249], [139, 250], [142, 250], [142, 208], [75, 206]], "text": "W&S", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "WWO", "recog_valid": false, "glyph_recog_text": "Weamp.s", "glyph_recog_ld": 0.14285836734518942}, {"polygon": [[304, 231], [302, 248], [303, 257], [367, 256], [367, 265], [375, 265], [375, 260], [382, 256], [380, 241], [326, 232]], "text": "Group", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Group", "recog_valid": true, "glyph_recog_text": "Group", "glyph_recog_ld": 1.0}, {"polygon": [[53, 235], [53, 272], [335, 285], [353, 273], [353, 251]], "text": "Financial", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "Financial~", "glyph_recog_ld": 9.99998999939855e-07}, {"polygon": [[49, 195], [56, 222], [420, 234], [422, 208]], "text": "Western & Southern", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "bwe we nn & Southerr", "recog_valid": false, "glyph_recog_text": "Western & Southern", "glyph_recog_ld": 0.5000002272726239}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444415.jpg", "caption": "a red scooter parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444418.jpg", "caption": "a group of people standing in a line", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444448.jpg", "caption": "a woman sitting on the sidewalk with a backpack and a toy", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116783.jpg", "caption": "a fire truck is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510004.jpg", "caption": "a glass table with flowers and vases on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000280643.jpg", "caption": "a delta airplane parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575574.jpg", "caption": "a group of people playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313449.jpg", "caption": "a large clock on a pole in the middle of a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084089.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000542853.jpg", "caption": "a car is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379034.jpg", "caption": "a group of people eating food in a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379035.jpg", "caption": "a large green and white airplane taking off from the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116894.jpg", "caption": "a blurry stop sign on a road with a train", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346270.jpg", "caption": "two women standing next to each other with a plate of food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575666.jpg", "caption": "a black bear walking across a paved road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116941.jpg", "caption": "a large airplane sitting on top of a runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000411862.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000116963.jpg", "caption": "a man on a skateboard doing a trick on a ramp", "annotations": [{"polygon": [[109, 223], [110, 267], [403, 262], [398, 222]], "text": "SILVER", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "QJISILVER", "recog_valid": false, "glyph_recog_text": "SILVER", "glyph_recog_ld": 0.6666670370366254}, {"polygon": [[175, 295], [291, 293], [293, 330], [178, 334]], "text": "TONY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TONY", "recog_valid": true, "glyph_recog_text": "TONY", "glyph_recog_ld": 1.0}, {"polygon": [[294, 293], [411, 292], [414, 327], [296, 329]], "text": "HAWK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HAWK", "recog_valid": true, "glyph_recog_text": "HAWK", "glyph_recog_ld": 1.0}, {"polygon": [[180, 345], [419, 331], [437, 421], [192, 426]], "text": "RIDE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RATE", "recog_valid": false, "glyph_recog_text": "RIDE", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346344.jpg", "caption": "a black box with a face on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149739.jpg", "caption": "a baseball player is celebrating after a home run", "annotations": [{"polygon": [[118, 297], [118, 297], [144, 276], [168, 266], [171, 269], [183, 301], [141, 328], [123, 336], [117, 317]], "text": "RED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "RED", "recog_valid": true, "glyph_recog_text": "RED", "glyph_recog_ld": 1.0}, {"polygon": [[191, 266], [199, 256], [221, 258], [229, 267], [222, 293], [199, 298], [190, 295]], "text": "SU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "so", "recog_valid": false, "glyph_recog_text": "s", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[418, 167], [418, 167], [459, 168], [465, 180], [462, 195], [452, 207], [430, 207], [406, 201], [410, 188]], "text": "24", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "24", "recog_valid": true, "glyph_recog_text": "24", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149755.jpg", "caption": "a child dressed as a lion riding a motorcycle", "annotations": [{"polygon": [[136, 297], [214, 270], [213, 254], [132, 280]], "text": "ASTRO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CRS", "recog_valid": false, "glyph_recog_text": "ASTRO", "glyph_recog_ld": 0.2000015999967999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000575756.jpg", "caption": "a group of people standing on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149774.jpg", "caption": "a woman holding a tennis racket", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018702.jpg", "caption": "a black and white photo of an airplane parked on the ground", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117009.jpg", "caption": "a pan with a pizza in it with black beans and cheese", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510235.jpg", "caption": "a cat laying on a rug next to a bottle of wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051487.jpg", "caption": "a motorcycle with a christmas decoration on the back", "annotations": [{"polygon": [[147, 328], [225, 333], [225, 333], [297, 307], [305, 348], [305, 348], [225, 361], [148, 368]], "text": "WHE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "WHEN", "recog_valid": false, "glyph_recog_text": "WHE", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051493.jpg", "caption": "a man holding a hot dog in front of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379177.jpg", "caption": "a series of pictures of food and drinks", "annotations": [{"polygon": [[37, 371], [147, 418], [140, 430], [28, 381]], "text": "CARAMEL", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "CARAMEL", "recog_valid": true, "glyph_recog_text": "CARAMEL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051501.jpg", "caption": "a woman in orange robes walking down a street with an umbrella", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543027.jpg", "caption": "a woman walking by a bike", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149833.jpg", "caption": "a train is pulling into a station with a person waiting", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510290.jpg", "caption": "a busy street with people walking and standing around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215380.jpg", "caption": "a young boy pitching a baseball on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000149849.jpg", "caption": "a man is sitting on a bench next to a cart with a mattress", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444783.jpg", "caption": "a bus stop sign on a pole in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510320.jpg", "caption": "a cluttered office with a desk and a chair", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379261.jpg", "caption": "a group of people standing around a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000444801.jpg", "caption": "a man playing tennis", "annotations": [{"polygon": [[383, 177], [403, 160], [425, 148], [426, 179], [401, 196]], "text": "HI", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "HI", "recog_valid": true, "glyph_recog_text": "HI", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248194.jpg", "caption": "a man in a tan coat standing next to a woman", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248203.jpg", "caption": "a baseball player figurine hanging from a christmas tree", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346521.jpg", "caption": "a baseball player and a woman standing on a field", "annotations": [{"polygon": [[394, 300], [386, 310], [389, 340], [423, 338], [429, 300]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "29", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 0.5000024999875001}, {"polygon": [[291, 259], [396, 236], [426, 247], [433, 272], [423, 279], [314, 329], [296, 322], [287, 310], [280, 286], [283, 272]], "text": "Orioles", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Oeiole", "recog_valid": false, "glyph_recog_text": "Orioles", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[51, 364], [36, 384], [39, 419], [49, 423], [51, 430], [63, 434], [77, 435], [91, 425], [101, 416], [108, 410], [111, 407], [96, 357]], "text": "Ori", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "od", "recog_valid": false, "glyph_recog_text": "o", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510365.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084383.jpg", "caption": "a man eating a hot dog", "annotations": [{"polygon": [[255, 353], [244, 390], [323, 409], [331, 370], [255, 352]], "text": "36", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "39", "recog_valid": false, "glyph_recog_text": "3 6", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248221.jpg", "caption": "a bus is stopped at a bus stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379304.jpg", "caption": "a teddy bear sitting on a bed in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346550.jpg", "caption": "a woman sitting on the floor with her laptop", "annotations": [{"polygon": [[84, 435], [431, 438], [427, 457], [421, 468], [435, 491], [99, 491], [97, 502], [83, 505]], "text": "punchstock", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "punchstock", "recog_valid": true, "glyph_recog_text": "punchstock", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018873.jpg", "caption": "thomas the train birthday cake", "annotations": [{"polygon": [[302, 225], [307, 279], [437, 271], [431, 233]], "text": "Samuel", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Samual", "recog_valid": false, "glyph_recog_text": "Samuel", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[123, 129], [122, 171], [245, 176], [242, 134]], "text": "Happy", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Happy", "recog_valid": true, "glyph_recog_text": "Happy", "glyph_recog_ld": 1.0}, {"polygon": [[257, 131], [256, 165], [294, 166], [290, 132]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "2", "recog_valid": true, "glyph_recog_text": "2", "glyph_recog_ld": 1.0}, {"polygon": [[256, 166], [261, 231], [455, 239], [444, 170]], "text": "Birrhday", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Binthdoy", "recog_valid": false, "glyph_recog_text": "Birrhday", "glyph_recog_ld": 0.6250004687494141}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313786.jpg", "caption": "a train is parked next to a cruise ship", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117178.jpg", "caption": "a group of people on motorcycles on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182746.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000313825.jpg", "caption": "a skateboarder is doing a trick on a rail", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182782.jpg", "caption": "a large airplane sitting in a museum", "annotations": [{"polygon": [[251, 204], [300, 222], [298, 269], [248, 263]], "text": "46", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "等", "recog_valid": false, "glyph_recog_text": "寸", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051717.jpg", "caption": "a truck is parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018952.jpg", "caption": "a clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215564.jpg", "caption": "a man in a cowboy hat holding a hot dog", "annotations": [{"polygon": [[212, 353], [244, 344], [251, 380], [219, 388]], "text": "Ed", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "eL", "recog_valid": false, "glyph_recog_text": "g", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[247, 343], [252, 377], [363, 340], [216, 388], [242, 421], [349, 373], [377, 353], [381, 340], [372, 322], [359, 323], [354, 314], [326, 322], [297, 332], [293, 327], [283, 332]], "text": "Ed Delevic's", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "g Sdeuit,", "recog_valid": false, "glyph_recog_text": "ed Delevic's", "glyph_recog_ld": 0.25000062499947917}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379434.jpg", "caption": "a man playing a game of beer pong", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000018994.jpg", "caption": "a person wearing a bear costume standing in front of a bar", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510525.jpg", "caption": "a stop sign and a street sign on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510531.jpg", "caption": "two women sitting at a table with a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000117322.jpg", "caption": "a train pulling into a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182860.jpg", "caption": "winds of change - energy as one of the key drivers of climate change", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215631.jpg", "caption": "a woman holding a remote control in her hand", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084561.jpg", "caption": "a black and white photo of a dog and a man in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150126.jpg", "caption": "a man is holding an elephant by the trunk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379508.jpg", "caption": "a young man sitting at a desk with a computer", "annotations": [{"polygon": [[104, 240], [19, 265], [25, 277], [106, 250]], "text": "CHAINTECH", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HDEINIVHD", "recog_valid": false, "glyph_recog_text": "CHAINTECH", "glyph_recog_ld": 0.22222308641879285}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182902.jpg", "caption": "a little girl sitting at a table with a cupcake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477831.jpg", "caption": "a fire hydrant sitting in the grass next to a gas station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248464.jpg", "caption": "a street sign with a blue sign that says maryland lake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000346785.jpg", "caption": "a street sign is covered in snow and traffic lights", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000477858.jpg", "caption": "two motorcycles are racing down a track", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510626.jpg", "caption": "a man holding a tennis racket and a ball", "annotations": [{"polygon": [[73, 303], [85, 268], [128, 297], [135, 343]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "", "recog_valid": false, "glyph_recog_text": "P", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000182974.jpg", "caption": "a small table with a small lamp on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281302.jpg", "caption": "a motorcycle on display at a show with people around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051943.jpg", "caption": "a group of people standing around a food truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215803.jpg", "caption": "a yellow and blue fire hydrant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000051965.jpg", "caption": "a bus is driving down the street", "annotations": [{"polygon": [[442, 91], [444, 106], [493, 80], [491, 63]], "text": "VANCOUVER", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "TNOUER", "recog_valid": false, "glyph_recog_text": "VANCOUVER", "glyph_recog_ld": 0.5555560493821674}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019202.jpg", "caption": "a street sign with a no parking sign and a right turn arrow", "annotations": [{"polygon": [[366, 325], [367, 252], [323, 252], [320, 324]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "0", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183068.jpg", "caption": "a group of people standing on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248610.jpg", "caption": "a woman stirring a pot of soup in the kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478006.jpg", "caption": "a street sign leaning on a rock in a city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445249.jpg", "caption": "a tennis match is being played on a clay court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000084819.jpg", "caption": "a pile of wood and other items in a field", "annotations": [{"polygon": [[70, 339], [82, 376], [101, 376], [90, 340]], "text": "PIPER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "03dld", "recog_valid": false, "glyph_recog_text": "PIPER", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000215907.jpg", "caption": "a woman sitting on a couch in front of a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000379749.jpg", "caption": "a large blue and white airplane on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183151.jpg", "caption": "a traffic light is next to a big ben clock tower", "annotations": [{"polygon": [[69, 455], [65, 484], [102, 480], [103, 454]], "text": "RO", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "DA", "recog_valid": false, "glyph_recog_text": "RO", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543622.jpg", "caption": "a fire truck driving down a street with traffic", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183202.jpg", "caption": "a cat laying in a suitcase", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478115.jpg", "caption": "a group of children sitting on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576421.jpg", "caption": "an apple computer, keyboard, cd, and other items on a desk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183212.jpg", "caption": "a woman sitting at a table with a pizza and a glass of beer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576431.jpg", "caption": "a group of people on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576445.jpg", "caption": "a shirtless man sitting on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183233.jpg", "caption": "a man standing next to a stop sign", "annotations": [{"polygon": [[31, 196], [130, 217], [143, 178], [38, 149]], "text": "stop", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STOP", "recog_valid": false, "glyph_recog_text": "stop", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000543689.jpg", "caption": "people walking down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000248779.jpg", "caption": "a large truck parked next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019413.jpg", "caption": "a bus on the street next to a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183256.jpg", "caption": "a man wearing a red hat sitting on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412664.jpg", "caption": "a window with blinds", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000510997.jpg", "caption": "a stop sign and street sign on a dirt road", "annotations": [{"polygon": [[309, 184], [309, 215], [362, 215], [362, 186]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216091.jpg", "caption": "a kitchen with a refrigerator, microwave and other items", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150559.jpg", "caption": "a man riding a bike down a city street", "annotations": [{"polygon": [[316, 55], [348, 51], [348, 81], [316, 83]], "text": "SWEAF", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "G", "recog_valid": false, "glyph_recog_text": "SNEAF", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347168.jpg", "caption": "a boy running with a kite", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314425.jpg", "caption": "a small airplane parked on the tarmac", "annotations": [{"polygon": [[74, 232], [66, 214], [108, 202], [114, 223]], "text": "Skylane", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Sflne", "recog_valid": false, "glyph_recog_text": "Skylane", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000445500.jpg", "caption": "a family poses for a photo in front of a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183401.jpg", "caption": "girls soccer game at the high school", "annotations": [{"polygon": [[390, 378], [411, 380], [427, 366], [432, 372], [430, 376], [426, 373], [414, 385], [488, 386], [501, 371], [503, 371], [503, 371], [505, 376], [498, 381], [493, 387], [505, 391], [484, 419], [457, 419], [420, 419], [422, 401], [386, 401]], "text": "Photography", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "Photiogogpgg", "recog_valid": false, "glyph_recog_text": "Photography", "glyph_recog_ld": 0.5833336805552662}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576625.jpg", "caption": "a person riding a wave on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150641.jpg", "caption": "a blue cargo container with a door open", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150646.jpg", "caption": "a man sitting at a bar with a television on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052348.jpg", "caption": "a young girl riding a skateboard in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216191.jpg", "caption": "a street with a car driving down it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052357.jpg", "caption": "a white wii console", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412824.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000150691.jpg", "caption": "a cat is holding a wii remote", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511146.jpg", "caption": "an american airlines jetliner parked on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000412855.jpg", "caption": "a street with a lot of traffic lights and street lights", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347359.jpg", "caption": "a man on a skateboard doing a trick on a ramp", "annotations": [{"polygon": [[36, 177], [214, 194], [212, 225], [32, 204]], "text": "Brooklyn", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Brooklyn .", "recog_valid": false, "glyph_recog_text": "Brooklyn", "glyph_recog_ld": 0.8000001999998}, {"polygon": [[214, 270], [234, 278], [251, 284], [268, 290], [265, 300], [258, 299], [247, 297], [230, 291], [214, 285], [209, 283]], "text": "BOBU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BO8U", "recog_valid": false, "glyph_recog_text": "BOBU", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478445.jpg", "caption": "a tennis player on a blue court", "annotations": [{"polygon": [[492, 130], [514, 141], [219, 168], [201, 154]], "text": "BRISBANE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ENVASIUA", "recog_valid": false, "glyph_recog_text": "BRISBANE", "glyph_recog_ld": 1.249998437424793e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576771.jpg", "caption": "a table with food and drinks on it", "annotations": [{"polygon": [[259, 142], [238, 161], [210, 213], [239, 231], [253, 181], [269, 160], [276, 149]], "text": "Dorito", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Dorito", "recog_valid": true, "glyph_recog_text": "Dorito", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576799.jpg", "caption": "a pizza with onions and cheese on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052523.jpg", "caption": "a red bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314672.jpg", "caption": "a street sign is in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019766.jpg", "caption": "a man riding a skateboard on a concrete ledge", "annotations": [{"polygon": [[335, 325], [437, 321], [484, 385], [355, 391]], "text": "BiS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "R", "recog_valid": false, "glyph_recog_text": "BiS", "glyph_recog_ld": 3.3333222222431402e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118081.jpg", "caption": "a car driving down the road with a traffic light", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478532.jpg", "caption": "a man walking down the street in front of a restaurant", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380239.jpg", "caption": "a desk with two computer monitors and a keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000314758.jpg", "caption": "a group of men playing frisbee", "annotations": [{"polygon": [[165, 237], [174, 268], [196, 262], [193, 239], [164, 237]], "text": "C", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Q", "recog_valid": false, "glyph_recog_text": "C", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000281997.jpg", "caption": "a group of people walking up stairs to an airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118171.jpg", "caption": "two men playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478636.jpg", "caption": "a woman is standing next to a horse in a trailer", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380337.jpg", "caption": "a green double decker bus parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118199.jpg", "caption": "a cubicle with a computer, a fan, and a television", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216507.jpg", "caption": "a tv with a star wars movie playing on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413115.jpg", "caption": "a street with a traffic light and a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000576956.jpg", "caption": "a train station with a clock on the wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118219.jpg", "caption": "a slice of pizza on a paper plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019918.jpg", "caption": "a man doing a skateboard trick", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052693.jpg", "caption": "two buses parked next to each other on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000019927.jpg", "caption": "a guinness time sign on a building", "annotations": [{"polygon": [[182, 410], [321, 404], [319, 377], [180, 385]], "text": "GUNNESS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "GUINNESS", "recog_valid": false, "glyph_recog_text": "GUNNESS", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000183771.jpg", "caption": "a street corner with a red door and a phone booth", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511480.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544264.jpg", "caption": "a man and a woman are playing frisbee in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380429.jpg", "caption": "a man and a little girl cutting apples", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000380453.jpg", "caption": "a group of men standing around a pile of bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020030.jpg", "caption": "a display of various items", "annotations": [{"polygon": [[384, 224], [373, 218], [373, 204], [383, 197], [389, 184], [399, 178], [401, 185], [402, 198], [405, 193], [456, 187], [466, 206], [458, 208], [440, 210], [429, 211], [417, 214], [402, 220], [392, 225]], "text": "STUDS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "OTUDS", "recog_valid": false, "glyph_recog_text": "STUDS", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[380, 435], [394, 412], [419, 400], [424, 400], [426, 412], [414, 416], [403, 426], [392, 439]], "text": "TOOTH", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "CUOTH", "recog_valid": false, "glyph_recog_text": "TOOTH", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[383, 225], [383, 225], [391, 253], [457, 244], [449, 227], [440, 222]], "text": "LINK", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "IIN", "recog_valid": false, "glyph_recog_text": "LINK", "glyph_recog_ld": 0.5000012499968749}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347715.jpg", "caption": "a row of parking meters on the side of a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020039.jpg", "caption": "a blue truck with a hood open", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347736.jpg", "caption": "a man standing on a stool with two elephants", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000478809.jpg", "caption": "a man is throwing a frisbee in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249441.jpg", "caption": "a large white airplane sitting on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544386.jpg", "caption": "people are sitting at tables in front of a food truck", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282257.jpg", "caption": "a man riding a skateboard down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511639.jpg", "caption": "a man riding an elephant down a street", "annotations": [{"polygon": [[318, 251], [318, 268], [331, 274], [337, 277], [350, 276], [350, 282], [355, 283], [360, 272], [349, 263], [337, 264], [330, 261]], "text": "PNGE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Q", "recog_valid": false, "glyph_recog_text": "PNGE", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000052901.jpg", "caption": "three children sitting on a bed", "annotations": [{"polygon": [[415, 285], [406, 306], [466, 319], [478, 324], [490, 304], [474, 290]], "text": "Arehie", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Alchjp", "recog_valid": false, "glyph_recog_text": "Arehie", "glyph_recog_ld": 0.3333344444425925}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315045.jpg", "caption": "a train traveling down the tracks with mountains in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151211.jpg", "caption": "a dog sitting in the driver's seat of a car", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151214.jpg", "caption": "a cake with a man on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413360.jpg", "caption": "a skateboarder doing a trick in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577240.jpg", "caption": "a woman is walking down the street with a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151264.jpg", "caption": "a bullet train on the tracks", "annotations": [{"polygon": [[359, 49], [370, 45], [370, 57], [392, 47], [395, 69], [361, 85]], "text": "Kose", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "fase", "recog_valid": false, "glyph_recog_text": "5", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347878.jpg", "caption": "a fire truck with a hose", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216822.jpg", "caption": "a group of people standing in an office hallway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347948.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000347962.jpg", "caption": "a man and two children riding on the back of a motorcycle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511803.jpg", "caption": "a woman sitting at a table with a pizza and soda", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282444.jpg", "caption": "a man in a white chef's outfit holding a cup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000216921.jpg", "caption": "a red bus is parked on the side of a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479066.jpg", "caption": "a man sitting at a table with a bottle of wine", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020315.jpg", "caption": "a group of dogs on a bench", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000511843.jpg", "caption": "a man playing tennis on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577380.jpg", "caption": "a person on a snowboard standing on a large sign", "annotations": [{"polygon": [[316, 334], [436, 321], [428, 350], [415, 365], [394, 376], [376, 378], [356, 378], [340, 372], [325, 366], [312, 356]], "text": "A", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "160", "recog_valid": false, "glyph_recog_text": "A", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[337, 156], [345, 255], [383, 258], [374, 160]], "text": "BE HERE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ABE HERE", "recog_valid": false, "glyph_recog_text": "ow 工w", "glyph_recog_ld": 0.12500109374863277}, {"polygon": [[317, 293], [316, 334], [419, 326], [419, 290]], "text": "109", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "109", "recog_valid": true, "glyph_recog_text": "109", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544615.jpg", "caption": "a small child playing with a remote control", "annotations": [{"polygon": [[109, 341], [170, 313], [176, 319], [114, 346]], "text": "YIAORONA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MHOVOYIAEO", "recog_valid": false, "glyph_recog_text": "生/天办名服评中", "glyph_recog_ld": 9.99998999939855e-07}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315242.jpg", "caption": "a young boy sitting on the snow with a snowboard", "annotations": [{"polygon": [[207, 274], [387, 269], [407, 323], [219, 325]], "text": "burton", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "UoaJng", "recog_valid": false, "glyph_recog_text": "burton", "glyph_recog_ld": 1.6666638887885554e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053101.jpg", "caption": "a woman playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184176.jpg", "caption": "a cake shaped like a truck with a picture of a truck on it", "annotations": [{"polygon": [[194, 302], [196, 290], [229, 295], [247, 301], [263, 308], [289, 325], [295, 333], [280, 337], [264, 325], [239, 312], [207, 304]], "text": "BIRTHDAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BIRTHDAY", "recog_valid": true, "glyph_recog_text": "BIRTHDAY", "glyph_recog_ld": 1.0}, {"polygon": [[52, 328], [64, 338], [76, 328], [90, 320], [103, 313], [119, 309], [121, 295], [120, 292], [109, 294], [96, 299], [81, 304]], "text": "HAPPY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "HAPPY", "recog_valid": true, "glyph_recog_text": "HAPPY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479102.jpg", "caption": "a crocheted bear and cup with a donut next to it", "annotations": [{"polygon": [[94, 156], [104, 173], [214, 111], [203, 96]], "text": "CREEPETZ", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CREEPETZ", "recog_valid": true, "glyph_recog_text": "CREEPETZ", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348031.jpg", "caption": "a large clock in the middle of a park", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184204.jpg", "caption": "a stop sign with a arrow pointing to the right", "annotations": [{"polygon": [[210, 104], [202, 125], [201, 174], [203, 187], [269, 191], [313, 201], [327, 204], [342, 163], [340, 132]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[230, 198], [230, 224], [230, 226], [253, 227], [282, 228], [299, 229], [312, 231], [311, 205], [290, 203], [263, 202]], "text": "HAMMERTIME", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "HAHNERTIE", "recog_valid": false, "glyph_recog_text": "HAMMERTIME", "glyph_recog_ld": 0.7000002999996999}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282552.jpg", "caption": "a large building with a sign that says tea", "annotations": [{"polygon": [[109, 224], [186, 229], [200, 304], [113, 309], [107, 258]], "text": "TEA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TEA", "recog_valid": true, "glyph_recog_text": "TEA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544703.jpg", "caption": "a chair and a television in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020421.jpg", "caption": "a group of men playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151499.jpg", "caption": "a white bus driving down a street with trees", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249804.jpg", "caption": "a group of workers working on a street", "annotations": [{"polygon": [[174, 65], [175, 145], [213, 143], [216, 66]], "text": "GY", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "19>", "recog_valid": false, "glyph_recog_text": "0>", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544719.jpg", "caption": "a large pile of luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217040.jpg", "caption": "a woman holding a cell phone up to her face", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413668.jpg", "caption": "a person using a mouse and keyboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151534.jpg", "caption": "a black and white photo of a plane flying in the sky", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000118769.jpg", "caption": "a man and woman holding an umbrella and a picture of a man", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000249869.jpg", "caption": "a woman in a hat holding bananas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151594.jpg", "caption": "a man and woman walking down a city street with luggage", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282670.jpg", "caption": "a white bus with colorful designs on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348206.jpg", "caption": "a red train traveling down the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446512.jpg", "caption": "a dog drinking water from a bottle", "annotations": [{"polygon": [[89, 22], [85, 32], [119, 53], [122, 49], [113, 43], [112, 33]], "text": "Sn", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "", "recog_valid": false, "glyph_recog_text": "sn", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577586.jpg", "caption": "three people standing in front of a wall with a banner", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184389.jpg", "caption": "a baseball player is holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315462.jpg", "caption": "a man on a motorcycle with a crowd of people watching", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053339.jpg", "caption": "a man holding a wii remote", "annotations": [{"polygon": [[1, 255], [0, 298], [27, 370], [216, 349], [202, 233]], "text": "Wii.", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Wii", "recog_valid": false, "glyph_recog_text": "Wii.", "glyph_recog_ld": 0.7500006249984374}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413792.jpg", "caption": "a large crowd of people standing around a clock", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184431.jpg", "caption": "a train traveling down a track next to tall buildings", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000544893.jpg", "caption": "people are standing around a tent with bicycles", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000413847.jpg", "caption": "a monkey on a banana with a sign that says cash only", "annotations": [{"polygon": [[57, 329], [57, 386], [188, 369], [173, 315], [154, 315], [129, 314], [103, 321]], "text": "CASH", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CASH", "recog_valid": true, "glyph_recog_text": "CASH", "glyph_recog_ld": 1.0}, {"polygon": [[58, 395], [57, 446], [66, 457], [96, 451], [181, 444], [192, 380], [129, 381], [115, 382]], "text": "ONLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "ONLY", "recog_valid": true, "glyph_recog_text": "ONLY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020651.jpg", "caption": "a group of girls posing for a picture with a pizza", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282803.jpg", "caption": "a group of people posing for a photo on a tennis court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000086213.jpg", "caption": "a man and woman in traditional clothing standing in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348368.jpg", "caption": "a clock tower with a clock on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348370.jpg", "caption": "a group of people standing on a snow covered hill", "annotations": [{"polygon": [[269, 445], [277, 257], [317, 258], [307, 449]], "text": "CUSTOM", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CUSTON", "recog_valid": false, "glyph_recog_text": "UD0FOE", "glyph_recog_ld": 0.1666680555532407}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577749.jpg", "caption": "an old black and white photo of cows in a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119006.jpg", "caption": "a black and white photo of a small airplane", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000577762.jpg", "caption": "a desk with several computers and a large poster", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381154.jpg", "caption": "a family of people standing in front of a bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446694.jpg", "caption": "a sandwich and a piece of bread on a tray", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119047.jpg", "caption": "a blue and white jet sitting on the grass", "annotations": [{"polygon": [[402, 186], [424, 135], [443, 139], [428, 188]], "text": "4", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "N", "recog_valid": false, "glyph_recog_text": "7", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053514.jpg", "caption": "a large clock hanging from the ceiling of a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000020770.jpg", "caption": "a man sitting on a bench with a surfboard", "annotations": [{"polygon": [[43, 231], [62, 326], [88, 324], [70, 234]], "text": "UN", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "Nnse", "recog_valid": false, "glyph_recog_text": "U N", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446769.jpg", "caption": "a man and woman sitting on a bench in front of a store", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282943.jpg", "caption": "a woman laying on a bed", "annotations": [{"polygon": [[301, 420], [306, 455], [172, 479], [168, 440]], "text": "KATE", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "CILV", "recog_valid": false, "glyph_recog_text": "KATE", "glyph_recog_ld": 2.4999937499048386e-06}, {"polygon": [[296, 384], [301, 418], [173, 438], [165, 400]], "text": "MOS", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "SOIN", "recog_valid": false, "glyph_recog_text": "MOS", "glyph_recog_ld": 0.25000187499531246}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000282949.jpg", "caption": "a red and white airplane on the tarmac", "annotations": [{"polygon": [[292, 242], [285, 269], [377, 275], [379, 245]], "text": "airberlin", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "airbertia", "recog_valid": false, "glyph_recog_text": "airberlin", "glyph_recog_ld": 0.7777780246910837}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381261.jpg", "caption": "a computer monitor with the january calendar on it", "annotations": [{"polygon": [[236, 220], [237, 252], [348, 253], [348, 221]], "text": "January", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "January", "recog_valid": true, "glyph_recog_text": "January", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250194.jpg", "caption": "people walking down the street in front of a large sign", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315732.jpg", "caption": "a man in a hat and sunglasses sitting on the side of a boat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000315734.jpg", "caption": "1957 chevrolet c10 cc-131209 for sale in oregon, california", "annotations": [{"polygon": [[401, 242], [407, 214], [437, 188], [447, 200], [432, 217], [422, 226], [404, 242]], "text": "GMX", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "EME", "recog_valid": false, "glyph_recog_text": "GMX", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119145.jpg", "caption": "a street sign with a green light and a green cross", "annotations": [{"polygon": [[0, 92], [71, 102], [66, 130], [0, 121]], "text": "WAY", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "WAY", "recog_valid": true, "glyph_recog_text": "WAY", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250226.jpg", "caption": "a group of airplanes on the tarmac at an airport", "annotations": [{"polygon": [[472, 410], [512, 413], [512, 448], [470, 448]], "text": "75", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "75", "recog_valid": true, "glyph_recog_text": "75", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184697.jpg", "caption": "a soccer game with players in yellow and red", "annotations": [{"polygon": [[297, 117], [297, 147], [359, 143], [363, 139], [362, 131], [314, 117]], "text": "Photo", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "boto", "recog_valid": false, "glyph_recog_text": "Photo", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[368, 116], [361, 145], [375, 148], [436, 142], [434, 130], [424, 116]], "text": "Stock", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Stock", "recog_valid": true, "glyph_recog_text": "Stock", "glyph_recog_ld": 1.0}, {"polygon": [[364, 98], [365, 128], [486, 127], [482, 98]], "text": "IZHAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "IZHAR", "recog_valid": true, "glyph_recog_text": "IZHAR", "glyph_recog_ld": 1.0}, {"polygon": [[265, 98], [266, 128], [357, 128], [351, 100]], "text": "IZAR", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "IZAR", "recog_valid": true, "glyph_recog_text": "IZAR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512380.jpg", "caption": "three baseball players standing on a baseball field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479621.jpg", "caption": "a man and a child on a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000446853.jpg", "caption": "an old airplane with a cartoon on it", "annotations": [{"polygon": [[340, 332], [411, 385], [396, 398], [324, 347]], "text": "UNITED", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "C3LINN", "recog_valid": false, "glyph_recog_text": "UNITED", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[76, 135], [136, 182], [119, 191], [61, 141]], "text": "NC3000I", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "100083N", "recog_valid": false, "glyph_recog_text": "NC30001", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053640.jpg", "caption": "a raven perched on a sign near the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053643.jpg", "caption": "a table with many bottles of beer and glasses", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545191.jpg", "caption": "a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000151979.jpg", "caption": "a woman holding a tennis racket on a tennis court", "annotations": [{"polygon": [[176, 125], [175, 203], [339, 203], [339, 130]], "text": "BNP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "SNP", "recog_valid": false, "glyph_recog_text": "BNP", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[369, 128], [368, 203], [510, 202], [505, 131]], "text": "AR", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "AR", "recog_valid": true, "glyph_recog_text": "AR", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348593.jpg", "caption": "a black microwave oven", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053702.jpg", "caption": "a man playing tennis with a racquet", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348639.jpg", "caption": "two trains are on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414178.jpg", "caption": "a large jetliner on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512503.jpg", "caption": "two men talking at an event in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381433.jpg", "caption": "a double decker bus driving down a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184835.jpg", "caption": "a man and a woman playing a video game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000479763.jpg", "caption": "a us air force plane parked on the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000086549.jpg", "caption": "a baseball player is swinging a bat on a field", "annotations": [{"polygon": [[248, 235], [256, 231], [260, 230], [269, 246], [275, 258], [279, 268], [274, 270], [263, 272]], "text": "9", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "O>", "recog_valid": false, "glyph_recog_text": "9", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184858.jpg", "caption": "a man holding a tennis racket on a tennis court", "annotations": [{"polygon": [[139, 371], [486, 464], [512, 464], [513, 437], [498, 432], [208, 355]], "text": "MONTREA", "illegibility": false, "language": "Latin", "valid": true, "pos": 8, "recog_text": "AYONTRE", "recog_valid": false, "glyph_recog_text": "MONTREA", "glyph_recog_ld": 0.5714291836725947}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381476.jpg", "caption": "a man in the air on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348719.jpg", "caption": "a disneyland bus with a disneyland theme", "annotations": [{"polygon": [[286, 273], [286, 273], [295, 277], [300, 278], [308, 278], [344, 276], [370, 276], [373, 300], [342, 301], [342, 307], [338, 309], [331, 304], [302, 304], [297, 309], [286, 308], [289, 303], [290, 282], [285, 277]], "text": "Disneyland", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Disaeulonai", "recog_valid": false, "glyph_recog_text": "Disneyland", "glyph_recog_ld": 0.5454549586773103}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184884.jpg", "caption": "a red light is on a street corner", "annotations": [{"polygon": [[157, 73], [189, 101], [189, 120], [157, 92]], "text": "TRATTORIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "TNTN", "recog_valid": false, "glyph_recog_text": "TRATTORA", "glyph_recog_ld": 0.2500009374988281}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578108.jpg", "caption": "two cats laying on a towel on a bathroom counter", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414276.jpg", "caption": "a baseball player in a uniform throwing a ball", "annotations": [{"polygon": [[207, 250], [226, 239], [242, 258], [221, 271], [215, 269]], "text": "M", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "M", "recog_valid": true, "glyph_recog_text": "M", "glyph_recog_ld": 1.0}, {"polygon": [[260, 221], [279, 211], [274, 198], [281, 197], [290, 203], [297, 206], [303, 217], [299, 222], [288, 226], [282, 231], [262, 239], [263, 240]], "text": "acle", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "acle", "recog_valid": true, "glyph_recog_text": "acle", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000184903.jpg", "caption": "a desk with a computer and a chair in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283213.jpg", "caption": "a skateboarder is doing a trick on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447074.jpg", "caption": "a bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381540.jpg", "caption": "a man in a suit and tie is posing for the camera", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283240.jpg", "caption": "a woman in a bikini riding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447084.jpg", "caption": "three boys holding a surfboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578171.jpg", "caption": "a man standing in a kitchen with a tv on", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021166.jpg", "caption": "a sign that says per smaro on a wall", "annotations": [{"polygon": [[75, 248], [188, 249], [180, 209], [78, 206], [78, 245]], "text": "PER", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "PER", "recog_valid": true, "glyph_recog_text": "PER", "glyph_recog_ld": 1.0}, {"polygon": [[204, 208], [201, 250], [237, 249], [237, 209]], "text": "S.", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "S.", "recog_valid": true, "glyph_recog_text": "s", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[245, 209], [245, 250], [445, 250], [443, 209]], "text": "MARCO", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MARCO", "recog_valid": true, "glyph_recog_text": "MARCO", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000053939.jpg", "caption": "a man in a red hoodie is sitting at a table with a laptop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152286.jpg", "caption": "a baseball player throwing a ball during a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000348907.jpg", "caption": "a black and white photo of a couple walking in a parking lot", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316141.jpg", "caption": "a group of people boarding a bus at a stop", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119534.jpg", "caption": "a frosted donut with blue and white sprinkles", "annotations": [{"polygon": [[67, 63], [100, 39], [142, 28], [133, 7], [112, 8], [75, 19], [62, 22]], "text": "RNOFF", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "RNOFE", "recog_valid": false, "glyph_recog_text": "RNOFF", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[114, 194], [168, 167], [178, 185], [121, 214]], "text": "Gampballs", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "6amp6elli", "recog_valid": false, "glyph_recog_text": "Ganphalis", "glyph_recog_ld": 0.333334074073251}, {"polygon": [[167, 114], [242, 79], [253, 104], [178, 137]], "text": "CURLY'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "CURLY'S", "recog_valid": true, "glyph_recog_text": "CURLY'S", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021235.jpg", "caption": "a man water skiing on a lake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512757.jpg", "caption": "a stop sign with a palm tree in the background", "annotations": [{"polygon": [[220, 350], [395, 286], [388, 228], [364, 215], [192, 282]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316152.jpg", "caption": "a red and white bus driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381694.jpg", "caption": "a crowded street with many people holding umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119551.jpg", "caption": "a large airplane parked on the tarmac with people around it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119552.jpg", "caption": "a train on the tracks in the fog", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414463.jpg", "caption": "a woman in a hat talking on a cell phone", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414477.jpg", "caption": "a red and yellow double decker bus", "annotations": [{"polygon": [[218, 195], [218, 195], [217, 210], [268, 195], [270, 180]], "text": "pilkingtonbus", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "pikwigtonbus", "recog_valid": false, "glyph_recog_text": "ciRkingbontsu", "glyph_recog_ld": 0.3846158579878015}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480022.jpg", "caption": "a woman in a pink dress is standing in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000381729.jpg", "caption": "a zebra eating grass in front of a circus tent", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054052.jpg", "caption": "a desk with a computer, a dvd, a remote control, and a game", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316238.jpg", "caption": "a group of people cutting a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217938.jpg", "caption": "a woman and two children in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283476.jpg", "caption": "a pan with vegetables and tofu in it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283485.jpg", "caption": "a bus driving down a street with people walking around", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152429.jpg", "caption": "a baseball player swinging a bat at a ball", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250737.jpg", "caption": "a police officer on a motorcycle", "annotations": [{"polygon": [[235, 155], [254, 145], [274, 141], [286, 140], [303, 143], [321, 153], [338, 166], [342, 161], [329, 148], [309, 138], [284, 131], [265, 134], [241, 141], [233, 147]], "text": "POLICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": "POLICE", "glyph_recog_ld": 1.6666638887885554e-06}, {"polygon": [[315, 267], [342, 251], [355, 240], [364, 230], [370, 217], [366, 213], [356, 227], [344, 239], [315, 256], [310, 259]], "text": "POLICE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "POLC", "recog_valid": false, "glyph_recog_text": "POLICE", "glyph_recog_ld": 0.6666672222212963}, {"polygon": [[322, 296], [322, 303], [325, 301], [329, 298], [335, 293], [340, 289], [346, 282], [352, 274], [354, 271], [354, 266], [352, 266], [349, 271], [345, 275], [340, 281], [334, 286], [328, 292]], "text": "ELECTRIC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ELAO", "recog_valid": false, "glyph_recog_text": "注格女:安", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152436.jpg", "caption": "a street sign with a blue sky in the background", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480116.jpg", "caption": "a man on a skateboard", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000217974.jpg", "caption": "a person riding skis down a snowy slope", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000512895.jpg", "caption": "a lufthansa airbus a320-214", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000578459.jpg", "caption": "a group of children sitting at a table with a birthday cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447432.jpg", "caption": "the chrysler building in new york city", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185291.jpg", "caption": "a man standing next to a cow at an event", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000119760.jpg", "caption": "a living room with a couch, television and a fireplace", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283600.jpg", "caption": "a baseball game with a batter, catcher, and umpire", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250834.jpg", "caption": "a street sign on the side of a highway at night", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087027.jpg", "caption": "a group of people playing frisbee", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250877.jpg", "caption": "a person doing a trick on skis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349204.jpg", "caption": "a red tow truck is towing a red double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021531.jpg", "caption": "a pizza on a table", "annotations": [{"polygon": [[77, 188], [136, 202], [114, 265], [62, 250]], "text": "42", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "42", "recog_valid": true, "glyph_recog_text": "¥", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000447520.jpg", "caption": "a dog sitting on a counter", "annotations": [{"polygon": [[429, 382], [467, 365], [476, 377], [435, 399]], "text": "Cycrolu 2", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Cycrolu", "recog_valid": false, "glyph_recog_text": "Cycrolu2", "glyph_recog_ld": 0.8750001562498047}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218147.jpg", "caption": "a pizza on a plate", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152650.jpg", "caption": "a man sitting on a ledge", "annotations": [{"polygon": [[241, 193], [225, 211], [223, 260], [237, 256], [276, 240], [333, 223], [334, 221], [323, 206], [261, 212], [259, 200], [251, 195], [242, 193]], "text": "Super", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "See", "recog_valid": false, "glyph_recog_text": "Super", "glyph_recog_ld": 0.4000011999976}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000250991.jpg", "caption": "a man and woman riding bicycles on a city street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054385.jpg", "caption": "a hot dog in a styrofoam container", "annotations": [{"polygon": [[144, 60], [154, 142], [181, 142], [169, 61]], "text": "CocaCola", "illegibility": false, "language": "Latin", "valid": true, "pos": 1, "recog_text": "W509)", "recog_valid": false, "glyph_recog_text": "Oouc", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513174.jpg", "caption": "a horse and rider jumping over an obstacle", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382104.jpg", "caption": "a police officer riding a motorcycle down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316575.jpg", "caption": "a stop sign is shown in the middle of a road", "annotations": [{"polygon": [[183, 262], [312, 265], [313, 321], [184, 322]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[263, 272], [258, 266], [290, 242], [294, 249]], "text": "CORE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CORE", "recog_valid": true, "glyph_recog_text": "于学北点", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000545953.jpg", "caption": "a red double decker bus driving down a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087207.jpg", "caption": "a city street at night", "annotations": [{"polygon": [[104, 171], [98, 199], [123, 215], [130, 184]], "text": "MY", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "三", "recog_valid": false, "glyph_recog_text": "人内", "glyph_recog_ld": 0.5000024999875001}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382127.jpg", "caption": "a clock and a wall clock are hanging on a wall", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054448.jpg", "caption": "a truck is parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283835.jpg", "caption": "a young boy in a baseball uniform", "annotations": [{"polygon": [[0, 42], [98, 42], [102, 73], [0, 85]], "text": "NY", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "NY", "recog_valid": true, "glyph_recog_text": "NY", "glyph_recog_ld": 1.0}, {"polygon": [[0, 97], [168, 76], [179, 135], [0, 159]], "text": "LEV", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "EEV", "recog_valid": false, "glyph_recog_text": "LEV", "glyph_recog_ld": 0.6666677777740742}, {"polygon": [[0, 205], [165, 184], [178, 245], [0, 270]], "text": "HIL", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "PHIL", "recog_valid": false, "glyph_recog_text": "HIL", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[0, 279], [167, 259], [174, 320], [-1, 346]], "text": "STL", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "STL", "recog_valid": true, "glyph_recog_text": "STL", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218307.jpg", "caption": "a yellow and black train engine sitting on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414918.jpg", "caption": "a black suv parked on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349390.jpg", "caption": "a group of people standing in a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120032.jpg", "caption": "a red stop sign", "annotations": [{"polygon": [[132, 304], [398, 289], [412, 225], [124, 232]], "text": "STOP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "STOP", "recog_valid": true, "glyph_recog_text": "STOP", "glyph_recog_ld": 1.0}, {"polygon": [[323, 438], [350, 407], [362, 415], [334, 448]], "text": "CENTER", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "CENTER", "recog_valid": true, "glyph_recog_text": "CEMTER", "glyph_recog_ld": 0.8333336111106482}, {"polygon": [[186, 394], [198, 414], [218, 396], [220, 395], [209, 376]], "text": "HOT", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "HON", "recog_valid": false, "glyph_recog_text": "HOT", "glyph_recog_ld": 0.6666677777740742}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480486.jpg", "caption": "a display case filled with many different kinds of donuts", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283885.jpg", "caption": "a blue and white train on tracks near a forest", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120044.jpg", "caption": "a group of people in a kitchen preparing food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185587.jpg", "caption": "two men are playing a video game in a living room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316667.jpg", "caption": "a skateboarder doing a trick in a bowling alley", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000414975.jpg", "caption": "a man sitting on a red bag", "annotations": [{"polygon": [[224, 156], [300, 153], [300, 179], [297, 191], [292, 201], [277, 205], [270, 199], [268, 180], [268, 178], [260, 187], [260, 187], [250, 187], [250, 187], [248, 207], [227, 206], [227, 206]], "text": "PG", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PG", "recog_valid": true, "glyph_recog_text": "PG", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218379.jpg", "caption": "a man sitting on a bench playing a guitar", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000152844.jpg", "caption": "a soccer game is being played on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000185623.jpg", "caption": "a hot dog and soda on a tray", "annotations": [{"polygon": [[439, 214], [446, 265], [494, 253], [509, 246], [513, 227], [512, 214], [482, 227], [476, 196]], "text": "PDR PEPP", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "P品", "recog_valid": false, "glyph_recog_text": "one", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[479, 198], [483, 225], [510, 214], [508, 187]], "text": "Dr", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Dr", "recog_valid": true, "glyph_recog_text": "Dr", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382251.jpg", "caption": "a plate with two cups of coffee and a cake", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480575.jpg", "caption": "a group of people standing in the rain with umbrellas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000283970.jpg", "caption": "a black and white photo of a polar bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480587.jpg", "caption": "a group of people riding motorcycles on a street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513361.jpg", "caption": "a street sign on a building with a street lamp", "annotations": [{"polygon": [[207, 303], [208, 258], [240, 266], [239, 306]], "text": "P", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "O2", "recog_valid": false, "glyph_recog_text": "a", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546130.jpg", "caption": "a baseball player swinging a bat at a ball", "annotations": [{"polygon": [[263, 328], [273, 321], [287, 317], [309, 313], [321, 314], [333, 314], [326, 335], [298, 334], [280, 337], [261, 347], [261, 332]], "text": "CABRERA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CABRERA", "recog_valid": true, "glyph_recog_text": "CABRERA", "glyph_recog_ld": 1.0}, {"polygon": [[267, 350], [281, 347], [317, 341], [322, 368], [321, 377], [321, 393], [282, 396], [262, 401], [262, 362]], "text": "24", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "24", "recog_valid": true, "glyph_recog_text": "24", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251230.jpg", "caption": "a blue and yellow bus parked in front of a building", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284008.jpg", "caption": "a plate with a sandwich and a glass of juice", "annotations": [{"polygon": [[352, 249], [351, 286], [457, 295], [455, 256]], "text": "Fifty", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Fifiy", "recog_valid": false, "glyph_recog_text": "Fifty", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[340, 274], [339, 308], [474, 320], [470, 290]], "text": "Shades", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Shades", "recog_valid": true, "glyph_recog_text": "Shades", "glyph_recog_ld": 1.0}, {"polygon": [[441, 308], [399, 304], [398, 338], [442, 341]], "text": "of", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "可", "recog_valid": false, "glyph_recog_text": "of", "glyph_recog_ld": 4.99997500014171e-06}, {"polygon": [[304, 343], [313, 382], [325, 379], [353, 367], [401, 360], [444, 356], [490, 352], [502, 345], [502, 335], [473, 330], [448, 326], [442, 340], [416, 339], [365, 340], [365, 329], [331, 334]], "text": "Feminism", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Femmicn", "recog_valid": false, "glyph_recog_text": "Feminism", "glyph_recog_ld": 0.5000006249992187}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316777.jpg", "caption": "a red double decker bus parked on the side of the road", "annotations": [{"polygon": [[201, 158], [210, 150], [220, 146], [235, 140], [237, 141], [236, 159], [203, 172], [203, 172]], "text": "global", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "global", "recog_valid": true, "glyph_recog_text": "global", "glyph_recog_ld": 1.0}, {"polygon": [[349, 92], [430, 64], [452, 64], [452, 95], [346, 127]], "text": "NEON", "illegibility": false, "language": "Latin", "valid": true, "pos": 2, "recog_text": "neon", "recog_valid": false, "glyph_recog_text": "NEON", "glyph_recog_ld": 2.4999937499048386e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021883.jpg", "caption": "a man with a beard and a purple hat on a cell phone", "annotations": [{"polygon": [[292, 153], [285, 179], [239, 169], [205, 175], [215, 148], [254, 141]], "text": "LSU", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "1Su", "recog_valid": false, "glyph_recog_text": "LSU", "glyph_recog_ld": 0.3333355555481482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546183.jpg", "caption": "a baseball player throwing a ball", "annotations": [{"polygon": [[274, 196], [285, 201], [298, 207], [313, 211], [308, 236], [273, 212]], "text": "GIA", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GIA", "recog_valid": true, "glyph_recog_text": "GIA", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251283.jpg", "caption": "a large airplane and helicopter on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480666.jpg", "caption": "two boats are sailing in the water", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546211.jpg", "caption": "a person standing in the snow with skis and poles", "annotations": [{"polygon": [[56, 380], [56, 380], [53, 430], [381, 431], [381, 379]], "text": "Dutchsimba", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "DutdhSimba", "recog_valid": false, "glyph_recog_text": "Dutchsimba", "glyph_recog_ld": 0.8000001999998}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480676.jpg", "caption": "betty ann's exit, cape cod, massachusetts", "annotations": [{"polygon": [[317, 120], [319, 170], [404, 170], [403, 124]], "text": "BETTY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "BETTY", "recog_valid": true, "glyph_recog_text": "BETTY", "glyph_recog_ld": 1.0}, {"polygon": [[322, 172], [323, 221], [404, 220], [402, 170]], "text": "ANN'S", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ANRNT'S", "recog_valid": false, "glyph_recog_text": "ANN'S", "glyph_recog_ld": 0.7142861224483965}, {"polygon": [[325, 243], [326, 277], [409, 275], [409, 243]], "text": "EXIT", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "EXIT", "recog_valid": true, "glyph_recog_text": "EXIT", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382383.jpg", "caption": "a young boy riding a surfboard in the ocean", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251315.jpg", "caption": "a modern bedroom with a bed, dresser and a large window", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218558.jpg", "caption": "a table with a sandwich and fries on it", "annotations": [{"polygon": [[463, 317], [447, 346], [460, 341], [502, 325], [512, 314], [511, 297]], "text": "7up", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LTOP", "recog_valid": false, "glyph_recog_text": "7 u y", "glyph_recog_ld": 1.9999959999239536e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382400.jpg", "caption": "a plane with a red wing", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316867.jpg", "caption": "a man on a motorcycle with luggage sitting on the side of the road", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000021968.jpg", "caption": "a woman is standing in front of a food stand", "annotations": [{"polygon": [[335, 164], [409, 125], [418, 139], [415, 164], [345, 204]], "text": "FROG", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "FROO", "recog_valid": false, "glyph_recog_text": "FROG", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[418, 125], [460, 122], [482, 131], [468, 172], [420, 160]], "text": "LEGS", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "LECS", "recog_valid": false, "glyph_recog_text": "LEGS", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[345, 302], [411, 264], [412, 281], [382, 303], [352, 318]], "text": "TOTALLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "TOTALLY", "recog_valid": true, "glyph_recog_text": "TOTALLY", "glyph_recog_ld": 1.0}, {"polygon": [[416, 255], [455, 232], [459, 238], [455, 251], [417, 275]], "text": "FRIED", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "FRIED", "recog_valid": true, "glyph_recog_text": "FRIED", "glyph_recog_ld": 1.0}, {"polygon": [[4, 248], [118, 192], [169, 183], [172, 218], [86, 243], [20, 287]], "text": "ZUCCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "ZUCCI", "recog_valid": false, "glyph_recog_text": "ZUCCH", "glyph_recog_ld": 0.8000003999992}, {"polygon": [[66, 262], [86, 246], [171, 220], [169, 259], [69, 291]], "text": "CURD", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "CUR", "recog_valid": false, "glyph_recog_text": "CURD", "glyph_recog_ld": 0.7500006249984374}, {"polygon": [[46, 410], [100, 391], [155, 354], [154, 378], [97, 415], [60, 427]], "text": "TOTALLY", "illegibility": false, "language": "Latin", "valid": true, "pos": 6, "recog_text": "TOTALLY", "recog_valid": true, "glyph_recog_text": "TOTALLY", "glyph_recog_ld": 1.0}, {"polygon": [[40, 35], [66, 28], [134, 30], [139, 38], [134, 52], [70, 50], [43, 59]], "text": "AFFLES", "illegibility": false, "language": "Latin", "valid": true, "pos": 0, "recog_text": "AFFLFS", "recog_valid": false, "glyph_recog_text": "AFFLES", "glyph_recog_ld": 0.8333336111106482}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120320.jpg", "caption": "a man is fixing a sink in a kitchen", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218628.jpg", "caption": "a clock and a panda bear", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382476.jpg", "caption": "an old black and white photo of people at a train station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218648.jpg", "caption": "a large truck driving down the street", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000316971.jpg", "caption": "two women playing tennis on a tennis court", "annotations": [{"polygon": [[413, 213], [424, 236], [483, 215], [469, 192]], "text": "p", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "", "recog_valid": false, "glyph_recog_text": "p", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000087602.jpg", "caption": "a baseball player in blue and yellow uniform holding a bat", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000153159.jpg", "caption": "a group of people skateboarding in a skate park", "annotations": [{"polygon": [[407, 173], [462, 169], [511, 167], [509, 133], [440, 137], [429, 130], [407, 131]], "text": "Pam", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "Pam", "recog_valid": true, "glyph_recog_text": "Pam", "glyph_recog_ld": 1.0}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546379.jpg", "caption": "a sandwich and tea on a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054868.jpg", "caption": "a woman is playing tennis on a court", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000480852.jpg", "caption": "a baseball player holding a bat on a field", "annotations": [{"polygon": [[220, 199], [240, 204], [258, 224], [254, 241], [232, 226], [223, 212]], "text": "Ph", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "QJS", "recog_valid": false, "glyph_recog_text": "PNh", "glyph_recog_ld": 3.3333222222431402e-06}, {"polygon": [[113, 247], [209, 248], [209, 204], [106, 204]], "text": "PNC", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "PNC", "recog_valid": true, "glyph_recog_text": "PNC", "glyph_recog_ld": 1.0}, {"polygon": [[215, 207], [325, 203], [328, 247], [217, 246]], "text": "11", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "B", "recog_valid": false, "glyph_recog_text": "11", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000349793.jpg", "caption": "a man sitting on a motorcycle drinking from a cup", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000382569.jpg", "caption": "a group of people standing on a skateboard ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054949.jpg", "caption": "a man in white playing tennis", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284340.jpg", "caption": "a black and white photo of a double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000054974.jpg", "caption": "a b-17 bomber on the tarmac", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284362.jpg", "caption": "a group of people on a boat with an american flag", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218830.jpg", "caption": "a yellow fire hydrant on a sidewalk", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000546515.jpg", "caption": "a train on the tracks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415464.jpg", "caption": "a baseball player holding a bat on a field", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000022256.jpg", "caption": "a kitchen with pots and pans hanging from the ceiling", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055033.jpg", "caption": "a white and black refrigerator", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186109.jpg", "caption": "a rugby player is running with the ball", "annotations": [{"polygon": [[66, 317], [51, 312], [74, 268], [88, 272]], "text": "2", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "IN", "recog_valid": false, "glyph_recog_text": "2", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186112.jpg", "caption": "a cat standing on a chair to watch a tv", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000218917.jpg", "caption": "a display case with many donuts and other food", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481073.jpg", "caption": "a stop sign and street signs on a pole", "annotations": [{"polygon": [[105, 136], [105, 136], [143, 127], [144, 155], [106, 161]], "text": "LIVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 3, "recog_text": "LIVE", "recog_valid": true, "glyph_recog_text": "LIVE", "glyph_recog_ld": 1.0}, {"polygon": [[149, 125], [191, 121], [194, 152], [153, 154]], "text": "OAK", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "OAK", "recog_valid": true, "glyph_recog_text": "OAK", "glyph_recog_ld": 1.0}, {"polygon": [[249, 117], [247, 146], [281, 156], [285, 132]], "text": "GLASS", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "GLANN", "recog_valid": false, "glyph_recog_text": "GLASS", "glyph_recog_ld": 0.6000007999984}, {"polygon": [[286, 131], [281, 158], [319, 171], [322, 146]], "text": "S CANYON", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "CANYON", "recog_valid": false, "glyph_recog_text": "stoAKRN", "glyph_recog_ld": 0.14285836734518942}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284468.jpg", "caption": "three men wearing ties standing next to each other", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186171.jpg", "caption": "a small plane flying in the sky with propellers", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415548.jpg", "caption": "a man sitting on a chair", "annotations": [{"polygon": [[168, 325], [202, 372], [205, 371], [206, 371], [171, 324]], "text": "Contact", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "", "recog_valid": false, "glyph_recog_text": ".......", "glyph_recog_ld": 1.4285693876736616e-06}, {"polygon": [[173, 322], [209, 373], [214, 368], [205, 359], [202, 353], [201, 344], [184, 317]], "text": "Contact", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "Contas", "recog_valid": false, "glyph_recog_text": "Contact", "glyph_recog_ld": 0.7142861224483965}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000579394.jpg", "caption": "a blue train traveling down the tracks in a rural area", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513861.jpg", "caption": "a baseball player in a white uniform", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513867.jpg", "caption": "a blue double decker bus", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000513881.jpg", "caption": "a silver horse statue on a table in a dining room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317283.jpg", "caption": "a street sign and a traffic light on a pole", "annotations": [{"polygon": [[335, 445], [336, 447], [358, 448], [394, 430], [390, 405]], "text": "SPAGHETTO", "illegibility": false, "language": "Latin", "valid": true, "pos": 7, "recog_text": "WIGHETTO", "recog_valid": false, "glyph_recog_text": "SRAGHEYTO", "glyph_recog_ld": 0.5555560493821674}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481129.jpg", "caption": "a black and white photo of several airplanes parked on the runway", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120683.jpg", "caption": "a street sign with two street signs on it", "annotations": [{"polygon": [[202, 163], [202, 183], [266, 157], [265, 133]], "text": "3000", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "3000", "recog_valid": true, "glyph_recog_text": "3000", "glyph_recog_ld": 1.0}, {"polygon": [[148, 232], [154, 271], [331, 205], [325, 165]], "text": "Stevens", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "stevens", "recog_valid": false, "glyph_recog_text": "Stevens", "glyph_recog_ld": 0.8571430612241983}, {"polygon": [[355, 164], [390, 149], [395, 180], [357, 193]], "text": "S T", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "ST", "recog_valid": false, "glyph_recog_text": "ST", "glyph_recog_ld": 1.0}, {"polygon": [[245, 331], [245, 304], [294, 324], [295, 350]], "text": "AVE", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "AVE", "recog_valid": true, "glyph_recog_text": "AVE", "glyph_recog_ld": 1.0}, {"polygon": [[189, 270], [196, 327], [212, 330], [225, 283]], "text": "7", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "厂", "recog_valid": false, "glyph_recog_text": "L", "glyph_recog_ld": 9.999900001056439e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000317322.jpg", "caption": "a train on the tracks at a station", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000251804.jpg", "caption": "a boy eating pizza at a table", "annotations": [{"polygon": [[282, 345], [302, 341], [321, 337], [331, 336], [351, 336], [355, 336], [356, 366], [335, 367], [314, 369], [295, 372], [287, 375]], "text": "AP", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "D", "recog_valid": false, "glyph_recog_text": "AP", "glyph_recog_ld": 4.99997500014171e-06}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000120743.jpg", "caption": "a model of a highway with cars and trucks", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415660.jpg", "caption": "a blue train with the words do not hump on it", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000055237.jpg", "caption": "a group of people sitting at a table", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000415694.jpg", "caption": "a desk with a computer and a chair in a room", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000088034.jpg", "caption": "a man and a woman are standing in front of a table with several pizzas", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000186338.jpg", "caption": "a boy riding a skateboard on a ramp", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000481251.jpg", "caption": "a red double decker bus driving down a street", "annotations": [{"polygon": [[441, 212], [439, 237], [469, 235], [487, 235], [512, 232], [510, 206]], "text": "BARCL", "illegibility": false, "language": "Latin", "valid": true, "pos": 5, "recog_text": "BARCI", "recog_valid": false, "glyph_recog_text": "BARCL", "glyph_recog_ld": 0.8000003999992}], "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000284649.jpg", "caption": "a clay sculpture of a woman standing in the grass", "width": 512, "height": 512}, {"img_name": "COCO_train2014_000000219114.jpg", "caption": "a black sign that says march on it", "annotations": [{"polygon": [[191, 159], [233, 157], [234, 333], [192, 326]], "text": "MARCH", "illegibility": false, "language": "Latin", "valid": true, "pos": 4, "recog_text": "MARCH", "recog_valid": true, "glyph_recog_text": "2